{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[u'austen-emma.txt',\n u'austen-persuasion.txt',\n u'austen-sense.txt',\n u'bible-kjv.txt',\n u'blake-poems.txt',\n u'bryant-stories.txt',\n u'burgess-busterbrown.txt',\n u'carroll-alice.txt',\n u'chesterton-ball.txt',\n u'chesterton-brown.txt',\n u'chesterton-thursday.txt',\n u'edgeworth-parents.txt',\n u'melville-moby_dick.txt',\n u'milton-paradise.txt',\n u'shakespeare-caesar.txt',\n u'shakespeare-hamlet.txt',\n u'shakespeare-macbeth.txt',\n u'whitman-leaves.txt']"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#古腾堡语料库\n",
    "\n",
    "import nltk\n",
    "nltk.corpus.gutenberg.fileids()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "192427\n['_MAX_REPR_SIZE', '__add__', '__class__', '__contains__', '__delattr__', '__dict__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__mul__', '__ne__', '__new__', '__radd__', '__reduce__', '__reduce_ex__', '__repr__', '__rmul__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__unicode__', '__weakref__', '_cache', '_current_blocknum', '_current_toknum', '_encoding', '_eofpos', '_fileid', '_filepos', '_len', '_open', '_stream', '_toknum', 'close', 'count', 'fileid', 'index', 'iterate_from', 'read_block', 'unicode_repr']\n"
     ]
    }
   ],
   "source": [
    "#简·奥斯丁的《爱玛》——并给它一个简短的名称 emma，然后找出它包 多少个词\n",
    "\n",
    "emma = nltk.corpus.gutenberg.words('austen-emma.txt')\n",
    "print (len(emma))\n",
    "print (dir(emma))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[u'i',\n u'do',\n u\"n't\",\n u'want',\n u'hot',\n u'pics',\n u'of',\n u'a',\n u'female',\n u',',\n u'I',\n u'can',\n u'look',\n u'in',\n u'a',\n u'mirror',\n u'.']"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#即时消息聊天会话语料库\n",
    "from nltk.corpus import nps_chat\n",
    "chatroom = nps_chat.posts('10-19-20s_706posts.xml')\n",
    "chatroom[123]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[u'The', u'Fulton', u'County', u'Grand', u'Jury', u'said', u'Friday', u'an', u'investigation', u'of', u\"Atlanta's\", u'recent', u'primary', u'election', u'produced', u'``', u'no', u'evidence', u\"''\", u'that', u'any', u'irregularities', u'took', u'place', u'.'], [u'The', u'jury', u'further', u'said', u'in', u'term-end', u'presentments', u'that', u'the', u'City', u'Executive', u'Committee', u',', u'which', u'had', u'over-all', u'charge', u'of', u'the', u'election', u',', u'``', u'deserves', u'the', u'praise', u'and', u'thanks', u'of', u'the', u'City', u'of', u'Atlanta', u\"''\", u'for', u'the', u'manner', u'in', u'which', u'the', u'election', u'was', u'conducted', u'.'], ...]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#布朗语料库\n",
    "#我们可以将语料库作为词链表或者句子链表来访问(每个句子 身也是一个词链表 )。 \n",
    "# 我们可以指定特定的类别或文件阅读:\n",
    "from nltk.corpus import brown\n",
    "brown.categories()\n",
    "brown.words(categories='news')\n",
    "brown.sents(categories=['news', 'editorial', 'reviews'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('can:', 94)\n('could:', 87)\n('may:', 93)\n('might:', 38)\n('must:', 53)\n('will:', 389)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEUCAYAAAA8+dFZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl4FeX5//H3fbIHQsJuWBQRREEQSECUWmu1brVVa+tS\nFbRW6lKr9Vur/bXWpbbaarUudaFCBbRq1baCO3WnLkAAUQRkVZA1EMKSjST3748zkRADJJBz5iT5\nvK5rrnPmmZkzn4SQO/PM8pi7IyIi0lCRsAOIiEjzosIhIiKNosIhIiKNosIhIiKNosIhIiKNosIh\nIiKNosIhIiKNosIhIiKNosIhIiKNkhx2gFjo1KmT9+rVa6+3Ly0tJSMjo+kCNdMMyqEciZ5BOZo2\nR0FBQaG7d97jiu7e4qa8vDzfFzNnztyn7ZtCImRwV466lCOxMrgrR137kgOY6Q34HauuKhERaRQV\nDhERaRQVDhERaRQVDhERaRQVDhERaRQVDhERaRQVjjoWrNmMa1REEZFdUuGoZfy0ZZxyzzu8tLgk\n7CgiIglLhaOWLu3SqHZ49MMtTF+2Mew4IiIJSYWjllMHdWPM13tT5XD54wWsKS4LO5KISMJR4ajj\nlyf2Y2CXVAq3VnDpYwWUV1aFHUlEJKGocNSRnBThmhE5dM/JYM6KTdw0eV7YkUREEooKRz3apUV4\n6Pw8UpMjPDF9BU9M/zzsSCIiCUOFYxcG9sjmtjMGAnDjc/OY/XlRyIlERBKDCsdunJnXg9FHHkBF\nVTWXPTaL9VvKw44kIhI6FY49+M2p/RnWqz1rNpdxxeOz2F5VHXYkEZFQqXDsQUpShL+eN5Su7dKY\nvnwjv39hftiRRERCpcLRAF2y0nngvDxSkoxH313Ov2atDDuSiEhoYlo4zCzHzJ4xswVmNt/MjjSz\nDmY21cwWBa/tg3XNzO41s8VmNtfMhtb6nNHB+ovMbHQsM+9K3gHtuem7AwD41b8+4uMvisOIISIS\nulgfcdwDvOzuhwCHA/OB64HX3L0v8FowD3Ay0DeYxgAPAphZB+BG4AhgOHBjTbGJtx8O35+z83tS\nXlnNTyYVsHFbRRgxRERCFbPCYWbtgK8D4wDcvcLdNwGnAROC1SYApwfvTwMmBmOmvw/kmFkucCIw\n1d03unsRMBU4KVa5d8fMuPm0ARzeM4cvNpXysydmU6mT5SLSysTyiKM3sB74u5nNNrNHzKwN0NXd\nVwMEr12C9bsDK2ptvzJo21V7KNJTknjo/KF0apvKtMWF3PHqwrCiiIiEwmI19oSZ5QPvAyPd/QMz\nuwfYDFzp7jm11ity9/Zm9gJwm7tPC9pfA34JfBNIc/dbg/YbgBJ3/3Od/Y0h2sVFbm5u3pQpU/Y6\ne0lJCZmZmbtdZ976Cm56ayPVDteMyGZkz4y93t/eZogH5VCORM6gHE2bIz8/v8Dd8/e4orvHZAL2\nA5bXmj8aeAFYCOQGbbnAwuD9w8C5tdZfGCw/F3i4VvtO69U35eXl+b6YOXNmg9Yb985SP+C65/3Q\nG17yBas379M+9zZDrCnHzpQjsTK4K0dd+5IDmOkN+P0es64qd18DrDCzfkHTccAnwGSg5sqo0cBz\nwfvJwKjg6qoRQLFHu7JeAU4ws/bBSfETgrbQXTSyF6cP7kZJRRU/mTST4tLtYUcSEYm55Bh//pXA\n42aWCiwFLiJ6XuWfZnYx8Dnwg2DdF4FTgMVASbAu7r7RzH4HzAjWu8XdE2KUJTPjtu8N4tO1W/lk\n9WaufnI240YPIxKxsKOJiMRMTAuHu88B6usvO66edR24YhefMx4Y37TpmkZGahIPX5DHd+6fxhsL\n1/OX1xZxzbcODjuWiEjM6M7xJtCzQyb3nTuEiMG9ry1i6idrw44kIhIzKhxN5Oi+nbn2xEMA+PlT\nc1iyfmvIiUREYkOFowldekxvThm4H1vLK/nJpAK2lleGHUlEpMmpcDQhM+NP3z+cvl3asnjdVn7x\nzw9rLiEWEWkxVDiaWNu0ZB6+II+stGRenreGB95cEnYkEZEmpcIRA707t+Uv5wwG4M5XF/LWp+tD\nTiQi0nRUOGLkuEO7cvXxfXGHnz0xm883lIQdSUSkSahwxNDPvtmX4w/tQnHpdsZMmklJhU6Wi0jz\np8IRQ5GIcdfZgzmwUxsWrNnC9c9+pJPlItLsqXDEWLv0FB6+II/M1CQmf7iKcdOWhR1JRGSfqHDE\nwcFds7jzB4cDcNtLC3h3SWHIiURE9p4KR5ycMjCXS485iKpq58p/zOaLTaVhRxIR2SsqHHF07Yn9\nOLpvJzZsq+Cyxwoo214VdiQRkUZT4YijpIhx7zlD6NE+g7kri7nhPx/rZLmINDsqHHHWvk0qD1+Q\nR3pKhKcLVvLYB5+HHUlEpFFUOEIwoFs2t39vEAC3TJlHwWcJMS6ViEiDqHCE5PQh3bloZC+2VzmX\nPjaLdZvLwo4kItIgKhwh+n+nHMoRB3Zg/ZZyLnt8FhWV1WFHEhHZIxWOEKUkRbj/h0PZr106BZ8V\nccvz88KOJCKyRyocIeuclcZDF+SRmhThsfc/558zV4QdSURkt1Q4EsDgnjn87vQBAPzmPx8zd+Wm\nkBOJiOyaCkeCOHvY/vzwiP2pqKzm0kkFFJfp5kARSUwqHAnkxu/0Z8j+OawqLuPP7xdTWaWT5SKS\neFQ4EkhachIPnZ9Hp7ZpzFtfwa0vzA87kojIV6hwJJiu7dJ56PyhJBs8+u5ynpyuO8tFJLGocCSg\n/F4dGJPXDoAbnvuYGct1Z7mIJA4VjgR13IGZO+4sn1Sgx7CLSMKIaeEws+Vm9pGZzTGzmUFbBzOb\namaLgtf2QbuZ2b1mttjM5prZ0FqfMzpYf5GZjY5l5kTy61MO5Wt9oo9hv2SCxiwXkcQQjyOOY919\nsLvnB/PXA6+5e1/gtWAe4GSgbzCNAR6EaKEBbgSOAIYDN9YUm5YuOSnC/T8cQq+OmXyyejO/ePpD\nqqv1GHYRCVcYXVWnAROC9xOA02u1T/So94EcM8sFTgSmuvtGdy8CpgInxTt0WHIyU3lkdD5Zacm8\n+NEa7nt9cdiRRKSVs1gOJGRmy4AiwIGH3X2smW1y95xa6xS5e3szex643d2nBe2vAdcB3wDS3f3W\noP0GoNTd76yzrzFEj1TIzc3NmzJlyl7nLikpITMzc6+3bwp1MxSsLue2aUU4cO2ROYzokR5KjrAo\nR+LlSIQMytG0OfLz8wtq9Q7tUvJefXrDjXT3VWbWBZhqZgt2s67V0+a7ad+5wX0sMBYgPz/f8/Ly\n9iYvAAUFBezL9k2hboY8wLOWcNtLC7h/5haOHTaQ/t3axT1HWJQj8XIkQgblCCdHTLuq3H1V8LoO\n+DfRcxRrgy4ogtd1weorgZ61Nu8BrNpNe6sz5uu9OWNId0q3V3HJxJls2FoediQRaYViVjjMrI2Z\nZdW8B04APgYmAzVXRo0GngveTwZGBVdXjQCK3X018Apwgpm1D06KnxC0tTpmxm3fG8jhPXP4YlOp\nxvAQkVDE8oijKzDNzD4EpgMvuPvLwO3At8xsEfCtYB7gRWApsBj4G3A5gLtvBH4HzAimW4K2Vik9\nJYmxF+TRtV0a05dt5MbJ84jleSoRkbpido7D3ZcCh9fTvgE4rp52B67YxWeNB8Y3dcbmqmu7dMZe\nkM8PHn6PJ6Z/zqG5WYw6slfYsUSkldCd483U4T1z+NOZgwC4econvLu4MOREItJaqHA0Y6cP6c6l\nxxxEVbVz+T9m8dmGbWFHEpFWQIWjmbv2xH5885AubCrZzo8nzGRL2fawI4lIC6fC0cwlRYx7zhlM\nny5tWbRuKz9/ao4eSyIiMaXC0QJkpafwyKh8sjNS+O/8ddz56sKwI4lIC6bC0UL06tSGB84bSlLE\neODNJTw354uwI4lIC6XC0YKM7NOJG759KAC/fGYuc1duCjmRiLREKhwtzOijenHOsJ6UV1YzZmIB\n6zaXhR1JRFoYFY4Wxsy45bTDGNarPWs2lzFmUgFl26vCjiUiLYgKRwuUmhzhwfPz6J6TwZwVm/j1\nvz/WY0lEpMmocLRQndqmMXZUHhkpSTw7ayXjpi0LO5KItBAqHC3YgG7Z3HVW9HFhf3hxPm8sXLeH\nLURE9kyFo4U7eWAuVx3Xl2qHn/1jNovXbQ07kog0cyocrcBVx/XlpAH7saW8kjETZ1JcoseSiMje\nU+FoBSIR489nHc4h+2WxtHAbVz45m8oqDQAlIntHhaOVaJOWzCOj8+nYJpW3P13P7S/tbvh3EZFd\nU+FoRXq0z+TB8/NIjhiPTFvG0zNXhB1JRJohFY5WZviBHfjd6YcB8Ot/f0zBZ612FF4R2UsqHK3Q\nucP3Z/SRB1BRVc1PJs1i1abSsCOJSDOiwtFK/ebU/hx1UEcKt5YzZtJMSiv0WBIRaRgVjlYqJSnC\nX384lAM6ZvLxF5u59pkP9VgSEWkQFY5WrH2bVP42Kp+2ack8P3c1D7y5JOxIItIMqHC0cgd3zeIv\nZw/GDO54ZSGvzlsTdiQRSXAqHMLx/bty7Yn9APj5U3NYsGZzyIlEJJGpcAgAlx1zEKcN7sa2iiou\nmTiTjdsqwo4kIglKhUOA6ABQfzxzEIN6ZLNiYymXP17Adj2WRETqEfPCYWZJZjbbzJ4P5g80sw/M\nbJGZPWVmqUF7WjC/OFjeq9Zn/CpoX2hmJ8Y6c2uVnpLE2Avy6ZyVxvtLN3LLlE/CjiQiCSgeRxxX\nAfNrzf8RuNvd+wJFwMVB+8VAkbv3Ae4O1sPM+gPnAAOAk4AHzCwpDrlbpf2y0xl7QR6pyREmvf8Z\nrywpCTuSiCSYmBYOM+sBfBt4JJg34JvAM8EqE4DTg/enBfMEy48L1j8NeNLdy919GbAYGB7L3K3d\nkP3bc9sZAwEYN3sz7y/dEHIiEUkkjS4cZtbezAY1cPW/AL8EajrLOwKb3L0ymF8JdA/edwdWAATL\ni4P1v2yvZxuJkTPzejDm672pcrj88VmsLNKRh4hEJTdkJTN7E/husP4cYL2ZveXu1+xmm1OBde5e\nYGbfqGmuZ1Xfw7LdbVN7f2OAMQC5ubkUFBTsKtoelZSU7NP2TSERMhzfxXmvcxIfra/ggoff4dZj\nO5CeHM71FInw/VCOxMugHOHkaFDhALLdfbOZ/Rj4u7vfaGZz97DNSOC7ZnYKkA60I3oEkmNmycFR\nRQ9gVbD+SqAnsNLMkoFsYGOt9hq1t/mSu48FxgLk5+d7Xl5eA7+0ryooKGBftm8KiZAB4BfbZ3DT\n/7axrHAb/1iSxP3nDiHagxhfifL9UI7EyqAc4eRo6J+PyWaWC5wFPN+QDdz9V+7ew917ET25/bq7\nnwe8AXw/WG008FzwfnIwT7D8dY8+PGkycE5w1dWBQF9gegNzyz5qmxrhb6PyaJuWzAt6LImI0PDC\ncTPwCrDY3WeYWW9g0V7u8zrgGjNbTPQcxrigfRzQMWi/BrgewN3nAf8EPgFeBq5wdz3KNY76dNnx\nWJI7X13Ifz9ZG3YkEQlRQwvHancf5O6XA7j7UuCuhu7E3d9091NrtnX34e7ex91/4O7lQXtZMN8n\nWL601va/d/eD3L2fu7/U8C9Pmsrx/bvyixP64Q5XPzWHxeu2hB1JRELS0MJxXwPbpAW7/BsH8e1B\nuWwtr+SSiQUUl2wPO5KIhGC3J8fN7EjgKKCzmdW+gqodoJvwWhkz447vD2LZ+m18snozVz45m79f\nOIykSPxPlotIePZ0xJEKtCVaYLJqTZvZcYJbWpHM1GTGjsqjQ5tU3v50PX96eUHYkUQkznZ7xOHu\nbwFvmdmj7v5ZnDJJguvRPpMHzhvK+Y98wMNvL+WQ3CzOGNIj7FgiEicNPceRZmZjzexVM3u9Zopp\nMkloI3p35MbvDgDgumc/Yu7KTSEnEpF4aegNgE8DDxF95pQuhRUAzj9ifz5ZtZknpn/OmIkFTL5y\nJF2y0sOOJSIx1tAjjkp3f9Ddp7t7Qc0U02SS8MyMm787gGG92rNmcxmXPTaL8kr9XSHS0jW0cEwx\ns8vNLNfMOtRMMU0mzUJqcoQHzsujW3Y6BZ8V8dv/zCN6w7+ItFQNLRyjgWuBd4GCYJoZq1DSvHTO\nSmPsqHzSkiM8NXMFE9/TdRQiLVmDCoe7H1jP1DvW4aT5OKx7Nn/6fvRp+7c8/wnvLikMOZGIxEpD\nH6s+qr52d5/YtHGkOTttcHfmr97CQ28t4YrHZzH5p1+jZ4fMsGOJSBNraFfVsFrT0cBNRMfnENnJ\ntSf249h+nSkq2c4lE2eyrbxyzxuJSLPS0K6qK2tNlwBDiN5VLrKTpIhxz7lD6N25DQvWbOEXT3+o\nk+UiLczeDudWQnRcDJGvaJeewt9G5ZOVlsxLH6/hvtcXhx1JRJpQgwqHmU0xs8nB9AKwkB0DMIl8\nxUGd23LvuUMwg7umfsqr89aEHUlEmkhDjzjuBP4cTH8Avu7u18cslbQIxx7ShV+eeAgAP39qDp+u\n1RgeIi1BQ89xvAUsIPpk3PZARSxDSctx6TG9+e7h3dhWUcUlE2eyqUQ/OiLNXUO7qs4iOs73D4iO\nO/6Bmemx6rJHZsYfzxzEYd3b8dmGEq58YjaVVdVhxxKRfdDQrqpfA8PcfbS7jwKGAzfELpa0JBmp\nSTx8QT6d2qbyzqJCbntJY3iINGcNLRwRd19Xa35DI7YVoXtOBg+en0dKkjFu2jKeKVgZdiQR2UsN\n/eX/spm9YmYXmtmFwAvAi7GLJS3RsF4duPm7hwHw//79EbM/Lwo5kYjsjd0WDjPrY2Yj3f1a4GFg\nEHA48B4wNg75pIX54RH7c/6I/amorOYnkwpYu7ks7Egi0kh7OuL4C7AFwN3/5e7XuPvPiR5t/CXW\n4aRluvE7Axh+YAfWbSnnJ5MKKNuuMTxEmpM9FY5e7j63bqO7zwR6xSSRtHgpSREePG8o3XMymLNi\nE7/5z8d6LIlIM7KnwrG7cUAzmjKItC4d26YxdlQe6SkRnilYyd//tzzsSCLSQHsqHDPM7JK6jWZ2\nMdHBnET22oBu2dz5g8MB+P2L85m2SGN4iDQHeyocVwMXmdmbZvbnYHoL+DFwVezjSUt36qBuXHHs\nQVRVO1f8YxafbdgWdiQR2YPdFg53X+vuRwE3A8uD6WZ3P9Ldd/vUOjNLN7PpZvahmc0zs5uD9gPN\n7AMzW2RmT5lZatCeFswvDpb3qvVZvwraF5rZifvyBUvi+b9v9eP4Q7tQXBodw2OrxvAQSWgNfVbV\nG+5+XzC93sDPLge+6e6HA4OBk8xsBPBH4G537wsUARcH618MFLl7H+DuYD3MrD9wDjAAOAl4wMyS\nGphBmoFIxLj77MH06dKWT9du5Zqn5lBdrZPlIokqZnd/e9TWYDYlmBz4JvBM0D4BOD14f1owT7D8\nODOzoP1Jdy9392XAYqKPPJEWJCsYw6NdejKvfrKWe15bFHYkEdkFi+VlkMGRQQHQB/grcAfwfnBU\ngZn1BF5y98PM7GPgJHdfGSxbAhxBdJja9939saB9XLDNM3X2NQYYA5Cbm5s3ZcqUvc5dUlJCZma4\nY2UnQoYwcsxeU84f3imiGrj2yBxG9EgPJceuKEdiZVCOps2Rn59f4O75e1ovea8+vYHcvQoYbGY5\nwL+BQ+tbLXi1XSzbVXvdfY0luJs9Pz/f8/Ly9iozQEFBAfuyfVNIhAxh5MgDqrOW8IcXF/DXgi0c\nd8RADtmvXav9fiRyjkTIoBzh5IjLgwrdfRPwJjACyDGzmoLVA1gVvF8J9AQIlmcDG2u317ONtECX\nHN2bM4Z0pyQYw6Nom8bwEEkkMSscZtY5ONLAzDKA44H5wBtAzVgeo9kxBO3kYJ5g+ese7UebDJwT\nXHV1INGxzqfHKreEz8y47XsDGdQjmxUbS7niH7Oo0slykYQRyyOOXOANM5sLzACmuvvzwHXANWa2\nGOgIjAvWHwd0DNqvAa4HcPd5wD+BT4CXgSuCLjBpwdJTknj4gjw6tU3j3SUbeGT2ZhUPkQQRs3Mc\nwTOuhtTTvpR6ropy9zKiIwzW91m/B37f1BklseVmZ/DwBUM5Z+z7vLq0lLMffo+7zx5Mzw7hn4AU\nac00GJMktLwDOjDhouG0T48w87MiTrnnHf4z+4uwY4m0aiockvCO6tOJu0/oxIkDurKlvJKrn5rD\nVU/Oprh0e9jRRFolFQ5pFrLSIjx0fh5/PHMgGSlJPDdnFafc8w7Tl20MO5pIq6PCIc2GmXH2sP15\n4WdfY1CPbL7YVMo5Y9/jzlcWsr2qOux4Iq2GCoc0O707t+XZy47ip8f2wYH731jM9x98l2WFerKu\nSDyocEizlJIU4Rcn9uPJS0bQPSeDD1cW8+173+GpGZ9rNEGRGFPhkGbtiN4defGqo/nu4d0oqaji\numc/4rLHZuluc5EYUuGQZi87I4V7zx3CX84eTNu0ZF6et4aT7nmb/y3WiIIisaDCIS3G6UO689JV\nR5N/QHvWbi7nvEc+4PcvfEJ5pR40INKUVDikRenZIZMnx4zgmm8dTFLE+Ns7yzj9r++yaO2WsKOJ\ntBgqHNLiJCdF+NlxfXn60iM5oGMm81dv5tT7pjHpveU6cS7SBFQ4pMUaun97XvjZ0fwgrwflldXc\n8Nw8Lp4wk/VbysOOJtKsqXBIi9Y2LZk7fnA4D5w3lOyMFF5fsI6T73mbNxasCzuaSLOlwiGtwikD\nc3n56qM5sndHCrdWcNGjM/jtcx9Ttl0nzkUaS4VDWo3c7Awe//ER/OrkQ0hJMia+9xnfuW8a81YV\nhx1NpFlR4ZBWJRIxfnLMQfz78pH07tyGReu2csZf3+WRd5ZSrYGiRBpEhUNapcO6Z/PClUdz/oj9\nqaiq5tYX5jNq/HTWFJeFHU0k4alwSKuVkZrEracP5JFR+XRok8q0xYWcdM/bvPzxmrCjiSQ0FQ5p\n9Y7v35WXrz6aYw7uzKaS7Vz6WAHXPzuXbeWVYUcTSUgqHCJAl6x0Hr1oGDd9pz+pyRGenLGCU++b\nxocrNoUdTSThqHCIBMyMC0ceyJSffo1D9stiWeE2znzwXf76xmKqdOJc5EsqHCJ19Nsvi/9cMZKL\nv3YgldXOHa8s5Nyx77OyqCTsaCIJITnsACKJKD0liRtO7c8xB3fm/57+kOnLN3LyPe9wVPcUeq2b\nT5vUZDJTk8hITaJNajIZqUlkpiaRGbTvtCwliUjEwv6SRJqMCofIbnz94M68cvXXue7ZuUz9ZC2v\nLKmEJUsb/TnpKZFdFpjMXRahry6reV9UWkXh1nKSI0ZSxEiORIhEiL5atNtNJFZUOET2oEObVMZe\nkMebn65n2uwFdNyvGyXlVZRUVFG6vZJtdd6XVlRRsr2y1jpVlG2vpmx7BTTlsOjP/3eXi5IjRiRi\ntQpL9LWmyCTtNG+7mY/smDcjKWnH8s1FxQzY+CndczLIzUmnW04G3bIzyEhNasIvUhKRCodIA5gZ\nx/brQrutK8jL69OobaurndLtQRGpqGJbReWX70uC9yV13pdWVLKt1jrb6qxfWl5BJCmZyqpqqh0q\nq6upqnYqqx13qKx2qHZiPYDuf5ct+kpb+8wUuuVkkJudQfegoOTm7HjfJSudJHXdNWsqHCIxFokY\nbdKSaZPWdP/dCgoKyMvLq3dZdbVT5f5lIamqrnlf/eX7ust2zFdTVb1zIaqutW7tdRYuWUZadhdW\nFZeyalMpqzaVsaa4jKKS7RSVbGfeqs315kuKGPu1Syc3OzhKycmgW0463bJ3vM/OSFF3WwKLWeEw\ns57ARGA/oBoY6+73mFkH4CmgF7AcOMvdiyz6U3IPcApQAlzo7rOCzxoN/Cb46FvdfUKscos0d5GI\nEcFIiXGPUQFrycvrt1NbdbVTuK2c1ZvKWLWplC82lbK6OPp+VfC6fks5XwTL+Kyo3s/OTE36srB0\nD45euuWkB91iGeRmp5Me6y9QdimWRxyVwP+5+ywzywIKzGwqcCHwmrvfbmbXA9cD1wEnA32D6Qjg\nQeCIoNDcCOQDHnzOZHev/ydOREITiRhdstLpkpXO4T1z6l2nvLKKtcXR4rFqUymri0v5YlMZq2sd\nuWwtr2TJ+m0sWb/rk0Id26TSLSeDrqkV/PnQ7WRnpsTqy5I6YlY43H01sDp4v8XM5gPdgdOAbwSr\nTQDeJFo4TgMmenRsz/fNLMfMcoN1p7r7RoCg+JwEPBGr7CISO2nJSezfMZP9O2bucp3NZduDIhIt\nJKuCI5eaYrOmuIwN2yrYsK2Cj4CLJ8zgsR8foaOQOLF4jMFsZr2At4HDgM/dPafWsiJ3b29mzwO3\nu/u0oP01ogXlG0C6u98atN8AlLr7nXX2MQYYA5Cbm5s3ZcqUvc5bUlJCZuauf6jjIREyKIdyJGqG\nKneKy6pZvbWKv7xfxMYyZ1i3NK49Mie0E++J8G+yrzny8/ML3D1/T+vF/OS4mbUFngWudvfNuznh\nVd8C3037zg3uY4GxAPn5+b6rE4cNsbsTj/GSCBmUQzkSPQNAu7QPuPHtYmasKufZz1K5/cyBoZxY\nT5TvRzxyxPSRI2aWQrRoPO7u/wqa1wZdUASvNYM/rwR61tq8B7BqN+0iIvRsl8z4C/NJT4nw1MwV\n3DX107AjtXgxKxzBVVLjgPnufletRZOB0cH70cBztdpHWdQIoDg4T/IKcIKZtTez9sAJQZuICAB5\nB3Tg/nOHkhQx7nt9MRPeXR52pBYtlkccI4ELgG+a2ZxgOgW4HfiWmS0CvhXMA7wILAUWA38DLgcI\nTor/DpgRTLfUnCgXEalxfP+u3Pa9gQDcNGUeL8xdHXKiliuWV1VNo/7zEwDH1bO+A1fs4rPGA+Ob\nLp2ItERn5fdk/ZZy7nhlIT9/ag7t26Rw1EGdwo7V4uix6iLSolz+jYO48KheVFRVM2ZiAR9/URx2\npBZHhUNEWhQz47en9ufUQblsLa/kwr/P4PMNGkulKalwiEiLE4kYfz7rcEb26Ujh1nJGjf+Awq3l\nYcdqMVQ4RKRFSktO4qHz8xjQrR3LN5Rw0d9nsLW8MuxYLYIKh4i0WFnpKTx60XAO6JjJR18Uc+mk\nAioqq8ODzlIAAAAOfElEQVSO1eypcIhIi9Y5K42JPxpOp7apTFtcyC+e/pDq6tg/aqklU+EQkRbv\ngI5tePSi4bRJTWLyh6u49YX5xOM5fS2VCoeItAqHdc9m7Kh8UpKM8f9bxsNvN37seIlS4RCRVmNk\nn07cddZgzOD2lxbwTMHKsCM1SyocItKqfOfwbtx4an8Arnt2Lq8vWBtyouZHhUNEWp0LRx7IFcce\nRFW1c/njs5j1uQYUbQwVDhFplX5xQj/Oyu9B2fZqfvToDBav2xJ2pGZDhUNEWiUz4w9nDOS4Q7qw\nqWQ7o8ZNZ3VxadixmgUVDhFptZKTItz/w6EM3T+HVcVljB4/neKS7WHHSngqHCLSqmWkJjH+wmH0\n6dKWT9du5ccTZ1C2vSrsWAlNhUNEWr2czFQm/mg4udnpzFhexJVPzKaySo8m2RUVDhERoFtOBhN+\nNJzsjBSmfrKWG577WHeX74IKh4hI4OCuWYy/MJ/0lAhPTF/B3VM/DTtSQlLhEBGpJe+ADtx/7lCS\nIsa9ry9m0nvLw46UcFQ4RETqOL5/V247YyAAv508jxc/Wh1yosSiwiEiUo+zhvXk2hP74Q5XPzmH\nd5cUhh0pYahwiIjswuXfOIgLj+pFRVU1P5lYwLxVxWFHSggqHCIiu2Bm/PbU/nx7UC5byiu58O8z\nWLGxJOxYoVPhEBHZjUjEuOuswxnZpyPrt5RzwbgPKNxaHnasUKlwiIjsQVpyEg+dn8eAbu1YvqGE\nHz06g63llWHHCo0Kh4hIA2Slp/DoRcPZv0Mmc1cWc9ljBVRUts67y2NWOMxsvJmtM7OPa7V1MLOp\nZrYoeG0ftJuZ3Wtmi81srpkNrbXN6GD9RWY2OlZ5RUT2pHNWGhN/NJxObVN5Z1Eh1z7zIdXVre/u\n8lgecTwKnFSn7XrgNXfvC7wWzAOcDPQNpjHAgxAtNMCNwBHAcODGmmIjIhKGXp3a8OhFw2mTmsRz\nc1bx+xfnt7pHk8SscLj728DGOs2nAROC9xOA02u1T/So94EcM8sFTgSmuvtGdy8CpvLVYiQiEleH\ndc/m4QvySUkyxk1bxti3l4YdKa7ifY6jq7uvBgheuwTt3YEVtdZbGbTtql1EJFRf69uJu84ajBnc\n9tICnl+0jS82lbaKriuL5SGWmfUCnnf3w4L5Te6eU2t5kbu3N7MXgNvcfVrQ/hrwS+CbQJq73xq0\n3wCUuPuf69nXGKLdXOTm5uZNmTJlr3OXlJSQmZm519s3hUTIoBzKkegZEiHHC4u2MX7OjmFnUyOw\nX9tkcrOS6JaVTG7b6Gu3tkm0S4tgZjHNsy/fj/z8/AJ3z9/Tesl79el7b62Z5br76qAral3QvhLo\nWWu9HsCqoP0bddrfrO+D3X0sMBYgPz/f8/Ly9jpkQUEB+7J9U0iEDMqhHImeIRFy5OVB716f8fi0\nhRSWR1i/pZzPN1fy+eZKYOf7PbLSkjmwcxsO7LTz1KtTG9qlpzRJnnh8P+JdOCYDo4Hbg9fnarX/\n1MyeJHoivDgoLq8Af6h1QvwE4FdxziwislvnjziAQ1MKycvLY0vZdpYXlrC0cCvLC0tYVriVZYXb\nWFq4jS1llcxdWczclV99dEmntqk7FZLendpwYKe2HNAxk/SUpBC+ql2LWeEwsyeIHi10MrOVRK+O\nuh34p5ldDHwO/CBY/UXgFGAxUAJcBODuG83sd8CMYL1b3L3uCXcRkYSRlZ7CwB7ZDOyRvVO7u7Nx\nWwXLCrd9ZVq+YRuFWyso3FrBjOVFO21nBt2yM+opKm3o0T6D5KT4344Xs8Lh7ufuYtFx9azrwBW7\n+JzxwPgmjCYiEndmRse2aXRsm0Z+rw47LauudtZsLvtqQSncxucbS/hiUylfbCpl2uKdn9CbHDH2\n75D5ZUE5sFMbKovKGVLtRCKxO5cS764qERGpIxIxuuVk0C0ng5F9Ou20bHtVNSuLSlkedHct+7IL\nLHoV19KgvUZmsjE6xjctqHCIiCSwlKTIl91Ux9ZZVra9iuUbtu0oKuu3UbRxQ8yv3FLhEBFpptJT\nkjhkv3Ycsl+7L9sKCgpivl895FBERBpFhUNERBpFhUNERBpFhUNERBpFhUNERBpFhUNERBpFhUNE\nRBpFhUNERBolpuNxhMXM1gOf7cNHdAIK97hWbCVCBlCOupQjsTKActS1LzkOcPfOe1qpRRaOfWVm\nMxsymElLz6AcypHoGZQjnBzqqhIRkUZR4RARkUZR4ajf2LADkBgZQDnqUo4dEiEDKEddMc+hcxwi\nItIoOuIQEZFGUeEQEZFG0UBOCczMcoGN7l4edhaRGmbWHugLpNe0ufvb4SVq3cwsre7viPrampKO\nOBLbJGCBmd0ZdhAz2y9O+5kUvF4Vj/01J2bW1cxODaYuIWX4MfA28Apwc/B6U0hZjjKzH5rZqJop\njvtOMrPH4rW/PXivgW1NRkccRP9DAn8Aurn7yWbWHzjS3ceFmcvdj7fo4MH9w8wRGAd8Ow77yTOz\nA4AfmdlEYKfBk919YxwyYGZbgF1eOeLu7Xa1LBbM7CzgDuBNot+T+8zsWnd/Jp45gKuAYcD77n6s\nmR1CtIDEVfAHxkHAHKAqaHZgYjz27+5VZtbZzFLdvSIe+6wr+GOuO5BhZkPY8X+lHZAZy32rcEQ9\nCvwd+HUw/ynwFNFflqHy6GVv8xIgRzyKBsBDwMtAb6D24MlG9BdD73iEcPcsADO7BVhD9OjPgPOA\nrHhkqOPXwDB3Xxfk6gz8F4h34Shz9zIzq+kOWWBm/eKcASAf6O/hXha6HPifmU0GttU0uvtdcdr/\nicCFQA+g9j63AP8vljtW4Yjq5O7/NLNfAbh7pZlV7WkjaXrufi9wr5k9SLSIfD1Y9La7fxhCpBPd\n/Yha8w+a2QfAn+KcI1JTNAIbCKereaWZ5QD/AaaaWRGwKoQcHwP7AatD2HeNVcEUIYQ/Jtx9AjDB\nzM5092fjuW8VjqhtZtaRoGvCzEYAxeFGavUWAI8B/yL6l/4kM/ubu98X5xxVZnYe8CTRn49z2dE1\nEk8vmdkrwBPB/NnAi/EO4e5nBG9vMrM3gGyiR4jx1gn4xMymA1+eBHb378YrgLvfDGBmWdFZ3xqv\nfdfJ8ayZfRsYwM4XLNwSq33qBkDAzIYC9wGHEf1LpjPwfXefG2qwVszM5hI9z7QtmG8DvOfug+Kc\noxdwDzCSaOH4H3C1uy+Pc44/Ah8AXyNaSN8GRrj7dfHMkSjM7Jj62t39rThmOIxoF2aHoKkQGOXu\nce1aNrOHiJ7TOBZ4BPg+MN3dL47ZPlU4oswsGehH9D/lQnffHnKkVs3MPiLap18WzKcDM9x9YLjJ\nwmFms9x9aJ22ufEupLKDmb0L/Nrd3wjmvwH8wd2PinOOue4+qNZrW+Bf7n5CrPaprqodhgO9iH5P\nhpoZ7h6XKzSkXn8HPjCzfwfzpxPCxQrBSehL2PGzAYC7/yhO+78MuBzoHRyF1cgievTTqpjZNHf/\nWj1XvRnR7qJ4Xu3WpqZoEN35m8GRcbyVBq8lZtaN6PmvA2O5QxUOwr+0T77K3e8yszfZ0TVzkbvP\nDiHKc8A7RK9gCuPcxj+Al4DbgOtrtW+J16XJicTdvxa8hnFlW11LzewGot1VAOcDy0LI8XxwwcId\nwCyiv7seieUO1VUFmNl8wr+0TxKQmc1x98Fh55DEYWaT3P0CM7uG6JFozR83bwE3u3tRiNnSgHR3\nj+nFPTriiEqES/skMT1vZqe4e9yvYJKEVXOT6miiJ6Rr7jGCOjesxouZHUWt7tRYd7W36iMOM5tC\n9B88CxgMhHZpnySmoC+9DdGfi+2E05cuCcTMfgZcRvRm1C9qLyL6sxGXm1Rr5am3q93dfxazfbby\nwnEM0X/sPwK/rL0I+GOdG7+klTKzDnz1oX5xu+xTEpOZPejulyVAjrh3tbfqrqqa//xmllL3F4GZ\nZYSTShJJ8FC/q4g+1mEOMAJ4FzguzFwSvkQoGoG4d7W36sKhSx2lARLioX4iddXpao/rXfStunCg\nSx1lzxLloX4idd3Jjq7202u117TFTKsuHMEla8VEnz8kUp9EeaifyE7C7Gpv1SfHRRojuJgiG3g5\nrDEYRGrU7moHltRalAX8z93Pj9m+VThERJofM8sG2hNCV7sKh4iINIrGHBcRkUZR4RARkUZR4RDZ\nAzP7tZnNM7O5ZjbHzGL2RAEze9PM8mP1+SJNoVVfjiuyJ2Z2JHAqMNTdy82sE5AaciyRUOmIQ2T3\ncoFCdy8HcPdCd19lZr81sxlm9rGZjTUzgy+PGO42s7fNbL6ZDTOzf5nZIjO7NVinl5ktMLMJwVHM\nM2aWWXfHZnaCmb1nZrPM7OlgZDfM7HYz+yTY9s44fi9EABUOkT15FehpZp+a2QO1xrq+392Hufth\nQAbRo5IaFe7+deAhogNBXUF0PPsLzaxjsE4/YGww9Otmotfjfyk4svkNcHwwZOxM4JrggYtnAAOC\nbW+NwdcsslsqHCK74e5bgTxgDLAeeMrMLgSONbMPgrHRvwkMqLXZ5OD1I2Ceu68OjliWAj2DZSvc\nveZ5aI8RHQyothFAf+B/ZjaH6NgPBxAtMmXAI2b2PaCkyb5YkQbSOQ6RPXD3KuBN4M2gUPwEGATk\nu/sKM7uJWo9cZ8eD5qprva+Zr/k/V/cGqrrzBkx19688DsfMhhN9Ou85wE+JFi6RuNERh8humFk/\nM+tbq2kwsDB4Xxicd/j+Xnz0/sGJd4g+K21aneXvAyPNrE+QI9PMDg72lx2MSHh1kEckrnTEIbJ7\nbYH7ggcdVgKLiXZbbSLaFbUcmLEXnzsfGG1mDwOLgAdrL3T39UGX2BPBONIQPeexBXjOzNKJHpX8\nfC/2LbJP9MgRkTgzs17A88GJdZFmR11VIiLSKDriEBGRRtERh4iINIoKh4iINIoKh4iINIoKh4iI\nNIoKh4iINIoKh4iINMr/B0xXCor3ytTWAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10a7eac50>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#比较不同文体中的情态动词的用法\n",
    "from nltk.corpus import brown\n",
    "\n",
    "news_text = brown.words(categories='news')\n",
    "fdist = nltk.FreqDist([w.lower() for w in news_text])\n",
    "modals = ['can', 'could', 'may', 'might', 'must', 'will']\n",
    "for m in modals:\n",
    "    print(m + ':', fdist[m],)\n",
    "fdist.plot(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[u'acq',\n u'alum',\n u'barley',\n u'bop',\n u'carcass',\n u'castor-oil',\n u'cocoa',\n u'coconut',\n u'coconut-oil',\n u'coffee',\n u'copper',\n u'copra-cake',\n u'corn',\n u'cotton',\n u'cotton-oil',\n u'cpi',\n u'cpu',\n u'crude',\n u'dfl',\n u'dlr',\n u'dmk',\n u'earn',\n u'fuel',\n u'gas',\n u'gnp',\n u'gold',\n u'grain',\n u'groundnut',\n u'groundnut-oil',\n u'heat',\n u'hog',\n u'housing',\n u'income',\n u'instal-debt',\n u'interest',\n u'ipi',\n u'iron-steel',\n u'jet',\n u'jobs',\n u'l-cattle',\n u'lead',\n u'lei',\n u'lin-oil',\n u'livestock',\n u'lumber',\n u'meal-feed',\n u'money-fx',\n u'money-supply',\n u'naphtha',\n u'nat-gas',\n u'nickel',\n u'nkr',\n u'nzdlr',\n u'oat',\n u'oilseed',\n u'orange',\n u'palladium',\n u'palm-oil',\n u'palmkernel',\n u'pet-chem',\n u'platinum',\n u'potato',\n u'propane',\n u'rand',\n u'rape-oil',\n u'rapeseed',\n u'reserves',\n u'retail',\n u'rice',\n u'rubber',\n u'rye',\n u'ship',\n u'silver',\n u'sorghum',\n u'soy-meal',\n u'soy-oil',\n u'soybean',\n u'strategic-metal',\n u'sugar',\n u'sun-meal',\n u'sun-oil',\n u'sunseed',\n u'tea',\n u'tin',\n u'trade',\n u'veg-oil',\n u'wheat',\n u'wpi',\n u'yen',\n u'zinc']"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "########## 路透社语料库\n",
    "from nltk.corpus import reuters\n",
    "reuters.fileids()\n",
    "reuters.categories()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "'type' object has no attribute '__getitem__'",
     "traceback": [
      "\u001b[0;31m\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0mTraceback (most recent call last)",
      "\u001b[0;32m<ipython-input-15-7cc417a393b4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m cfd = nltk.ConditionalFreqDist(\n\u001b[1;32m      4\u001b[0m     \u001b[0;34m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m     \u001b[0;32mfor\u001b[0m \u001b[0mfileid\u001b[0m \u001b[0;32min\u001b[0m \u001b[0minaugural\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfileids\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mw\u001b[0m \u001b[0;32min\u001b[0m \u001b[0minaugural\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwords\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfileid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mtarget\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'america'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'citizen'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/usr/local/var/pyenv/versions/2.7.12/lib/python2.7/site-packages/nltk/probability.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, cond_samples)\u001b[0m\n\u001b[1;32m   1751\u001b[0m         \u001b[0mdefaultdict\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mFreqDist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1752\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mcond_samples\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1753\u001b[0;31m             \u001b[0;32mfor\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mcond\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcond_samples\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1754\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcond\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0msample\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1755\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-15-7cc417a393b4>\u001b[0m in \u001b[0;36m<genexpr>\u001b[0;34m((fileid,))\u001b[0m\n\u001b[1;32m      6\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mw\u001b[0m \u001b[0;32min\u001b[0m \u001b[0minaugural\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwords\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfileid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mtarget\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'america'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'citizen'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m     if w.lower().startswith(target))\n\u001b[0m\u001b[1;32m      9\u001b[0m \u001b[0mcfd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: 'type' object has no attribute '__getitem__'"
     ],
     "output_type": "error"
    }
   ],
   "source": [
    "########## 就职演说语料库\n",
    "from nltk.corpus import inaugural\n",
    "cfd = nltk.ConditionalFreqDist(\n",
    "    (target, file[:4])\n",
    "    for fileid in inaugural.fileids()\n",
    "    for w in inaugural.words(fileid)\n",
    "    for target in ['america', 'citizen']\n",
    "    if w.lower().startswith(target))\n",
    "cfd.plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<ConditionalFreqDist with 15 conditions>\n"
     ]
    }
   ],
   "source": [
    "######### 按文体计数词汇\n",
    "from nltk.corpus import brown\n",
    "cfd = nltk.ConditionalFreqDist((genre, word)\n",
    "    for genre in brown.categories()\n",
    "    for word in brown.words(categories=genre))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "FreqDist({u'sunbonnet': 1,\n          u'Elevated': 1,\n          u'narcotic': 2,\n          u'four': 73,\n          u'woods': 4,\n          u'railing': 1,\n          u'Until': 5,\n          u'aggression': 1,\n          u'marching': 2,\n          u'increase': 24,\n          u'eligible': 4,\n          u'electricity': 1,\n          u'$25-a-plate': 1,\n          u'wheeled': 2,\n          u'Casey': 6,\n          u'all-county': 1,\n          u'Belgians': 20,\n          u'Western': 7,\n          u'dependency': 2,\n          u'1959-60': 1,\n          u'Duhagon': 1,\n          u'sinking': 1,\n          u'1,119': 1,\n          u'co-operation': 1,\n          u'Famed': 1,\n          u'regional': 2,\n          u'Charitable': 1,\n          u'appropriation': 2,\n          u'yellow': 3,\n          u'Old': 11,\n          u'Heights': 1,\n          u'bringing': 2,\n          u'Policies': 2,\n          u'prize': 5,\n          u'Loen': 1,\n          u'Publique': 2,\n          u'wooden': 1,\n          u'Loeb': 1,\n          u'specialties': 1,\n          u'Sands': 1,\n          u'succession': 1,\n          u'Paul': 6,\n          u'shows': 4,\n          u'commented': 7,\n          u'Screw': 1,\n          u'charter': 15,\n          u'Oslo': 5,\n          u'tired': 3,\n          u'pulse': 1,\n          u'tires': 3,\n          u'271': 1,\n          u'second': 35,\n          u'273': 1,\n          u'Pampa': 2,\n          u'DiVarco': 1,\n          u'errors': 8,\n          u'Initially': 1,\n          u'Lucille': 2,\n          u'boogie': 1,\n          u'contributed': 5,\n          u'Seekonk': 2,\n          u'Hamilton': 2,\n          u'designing': 2,\n          u'College': 20,\n          u'increasing': 2,\n          u'Presidential': 2,\n          u'dispatched': 3,\n          u'hero': 7,\n          u'Sioux': 1,\n          u'Foundation': 3,\n          u'Munoz': 1,\n          u'error': 2,\n          u'here': 67,\n          u'reported': 28,\n          u'affiliated': 1,\n          u'Footnotes': 1,\n          u'Stephanie': 2,\n          u'doldrums': 1,\n          u'cyclical': 1,\n          u'kids': 6,\n          u'Fernberger': 1,\n          u'elaborate': 3,\n          u'climbed': 2,\n          u'reports': 12,\n          u'controversy': 5,\n          u'Boxwood': 1,\n          u'military': 30,\n          u'Walters': 1,\n          u'Isles': 1,\n          u'rebel': 3,\n          u'golden': 5,\n          u'Quincy': 1,\n          u'ground': 10,\n          u'Harvey': 7,\n          u'explained': 13,\n          u'precincts': 4,\n          u'Three': 4,\n          u'replace': 4,\n          u'brought': 21,\n          u'beneficiaries': 1,\n          u'Basic': 1,\n          u'Wales': 2,\n          u'Basin': 2,\n          u'unit': 10,\n          u'opponents': 1,\n          u'Ronald': 2,\n          u'Callan': 1,\n          u'spoke': 4,\n          u'tardiness': 1,\n          u'Slate': 5,\n          u'century': 2,\n          u'Admitting': 1,\n          u'Anticipated': 1,\n          u'occupying': 1,\n          u'Vernon': 4,\n          u'Tex.': 5,\n          u'music': 12,\n          u'therefore': 7,\n          u'passport': 1,\n          u'unfortunately': 1,\n          u'strike': 12,\n          u'heralded': 1,\n          u'until': 28,\n          u'Tudor': 1,\n          u'Stepanovich': 2,\n          u'females': 1,\n          u'Christine': 2,\n          u'successful': 10,\n          u'brings': 1,\n          u'whirling': 1,\n          u'Rule': 2,\n          u'99': 1,\n          u'Person': 2,\n          u'menaced': 1,\n          u'tying': 1,\n          u'90': 2,\n          u'hole': 17,\n          u'hold': 10,\n          u'95': 3,\n          u'circumstances': 5,\n          u'locked': 1,\n          u'brutality': 2,\n          u'Wilderness': 1,\n          u'homemakers': 1,\n          u'famed': 1,\n          u'accomplishment': 1,\n          u'Professors': 2,\n          u'Westphalia': 2,\n          u'temperatures': 1,\n          u'Travelers': 1,\n          u'centralization': 2,\n          u'example': 15,\n          u'Le': 1,\n          u'La': 7,\n          u'household': 3,\n          u'artillery': 2,\n          u'organized': 6,\n          u'Briar': 1,\n          u'Smith-Colmer': 1,\n          u'currency': 1,\n          u'caution': 1,\n          u'reviewing': 2,\n          u'want': 16,\n          u'counseling': 3,\n          u'Easier': 1,\n          u'absolute': 1,\n          u'preferably': 1,\n          u'hog': 2,\n          u'hoc': 1,\n          u'knows': 6,\n          u'complaining': 1,\n          u'travel': 2,\n          u'drying': 2,\n          u'feature': 4,\n          u'Gardner': 1,\n          u'machine': 5,\n          u'how': 37,\n          u'hot': 9,\n          u'significance': 4,\n          u'Comedian': 1,\n          u'Gerosa': 2,\n          u'preferable': 1,\n          u\"He'll\": 2,\n          u'A': 137,\n          u'beauty': 4,\n          u'inherent': 2,\n          u'L.': 25,\n          u'swing': 1,\n          u'outlawed': 1,\n          u'Players': 1,\n          u'modest': 7,\n          u'Reese': 3,\n          u'destined': 1,\n          u'fourteen-team': 1,\n          u'sentencing': 1,\n          u'types': 4,\n          u'ballroom': 4,\n          u'effective': 15,\n          u'down-payments': 1,\n          u'youths': 4,\n          u'romped': 1,\n          u'revolt': 1,\n          u'headquarters': 18,\n          u'Walkers': 1,\n          u'baggage': 2,\n          u'18th': 4,\n          u'4-7/8': 1,\n          u'Another': 7,\n          u'keeps': 3,\n          u'democratic': 1,\n          u'wing': 4,\n          u'wind': 4,\n          u'leisurely': 1,\n          u'Willy': 1,\n          u'senators': 4,\n          u'$840,000': 1,\n          u'welcomed': 2,\n          u'Edith': 1,\n          u'Housing': 3,\n          u'reforms': 4,\n          u'vary': 2,\n          u'kickoff': 2,\n          u'thousands': 7,\n          u'Dussa': 1,\n          u'Toll': 1,\n          u'Ludwig': 1,\n          u'Commies': 1,\n          u'His': 29,\n          u'Hit': 1,\n          u'surviving': 1,\n          u'fit': 3,\n          u'striking': 4,\n          u\"Gardner's\": 4,\n          u'secede': 1,\n          u'survivors': 1,\n          u'Harris': 8,\n          u'Barber': 5,\n          u'Palsy': 1,\n          u'hidden': 1,\n          u'county-wide': 2,\n          u'Sinfonica': 1,\n          u'slate': 1,\n          u'vouchers': 1,\n          u'detachment': 1,\n          u'effects': 2,\n          u'schools': 37,\n          u'P.m.': 1,\n          u'undeveloped': 1,\n          u'silver': 3,\n          u'glutted': 1,\n          u'headboard': 1,\n          u'represents': 2,\n          u'debut': 4,\n          u\"road's\": 3,\n          u'skills': 3,\n          u'McCluskey': 1,\n          u'4-year-old': 1,\n          u'clientele': 1,\n          u'Seidel': 2,\n          u\"employers'\": 1,\n          u'Superior': 5,\n          u'preceded': 3,\n          u'financial': 11,\n          u'reputedly': 1,\n          u'series': 25,\n          u'finger-paint': 1,\n          u'Mongolia': 1,\n          u\"NATO's\": 1,\n          u'3-to-o': 1,\n          u'whiz': 2,\n          u\"we'd\": 1,\n          u'substantially': 1,\n          u'laboratory': 2,\n          u'tricked': 1,\n          u\"House's\": 2,\n          u'whip': 2,\n          u'borne': 1,\n          u'misfortune': 2,\n          u'two-and-a-half-mile': 1,\n          u'ten-concert': 1,\n          u'5847': 1,\n          u'flooded': 1,\n          u'encourage': 3,\n          u'millions': 9,\n          u'Super': 1,\n          u\"Simpson's\": 1,\n          u'sexton': 1,\n          u'foundation': 1,\n          u'inhabitants': 2,\n          u'Word': 1,\n          u\"Nugent's\": 1,\n          u'Extension': 1,\n          u'sellout': 1,\n          u'University': 42,\n          u'Work': 1,\n          u'threatened': 4,\n          u'3-to-3': 1,\n          u'Marcus': 1,\n          u'3-run': 1,\n          u'Jacques': 1,\n          u'sheet': 1,\n          u'estimate': 3,\n          u'alert': 1,\n          u'substantiation': 1,\n          u'cornerstone': 1,\n          u'enormous': 1,\n          u'Hord': 1,\n          u'shelves': 1,\n          u'24-inch': 1,\n          u'shipped': 1,\n          u'musicians': 3,\n          u'speedy': 1,\n          u'coeds': 1,\n          u'Human': 2,\n          u'1970s': 1,\n          u'reserving': 1,\n          u'repealed': 1,\n          u'Palmer': 42,\n          u'hearsay': 1,\n          u'Convair': 1,\n          u\"Al's\": 1,\n          u'Due': 1,\n          u'channels': 1,\n          u'wash': 2,\n          u'$18.9': 1,\n          u'175': 1,\n          u'174': 1,\n          u'173': 1,\n          u'$18.2': 1,\n          u'Steelers': 2,\n          u'basketball': 5,\n          u'service': 37,\n          u'engagement': 4,\n          u'returns': 2,\n          u'needed': 26,\n          u'Simmons': 2,\n          u'master': 5,\n          u'listed': 10,\n          u'Dumont': 2,\n          u'legs': 3,\n          u'bitter': 5,\n          u'ranging': 3,\n          u'listen': 2,\n          u'collapse': 1,\n          u'predictably': 1,\n          u'convention': 2,\n          u'wisdom': 5,\n          u'advisement': 2,\n          u'task': 5,\n          u'ASDIC': 1,\n          u'defaulted': 1,\n          u'Bertoia': 1,\n          u'peril': 1,\n          u'outlay': 2,\n          u'showed': 5,\n          u'elegant': 2,\n          u'Carroll': 2,\n          u'walloped': 1,\n          u'nations': 12,\n          u'project': 26,\n          u'percentages': 1,\n          u'idle': 2,\n          u'Ilona': 1,\n          u'skimmed': 1,\n          u'feeling': 10,\n          u'acquisition': 1,\n          u'Cody': 1,\n          u'Angelo': 3,\n          u'acclaim': 1,\n          u'entail': 1,\n          u'willingness': 2,\n          u'Chicago': 22,\n          u'Woodyard': 1,\n          u'Mullenax': 2,\n          u'spent': 12,\n          u'Mont.': 1,\n          u\"Lanin's\": 1,\n          u'Gursel': 3,\n          u'picks': 1,\n          u'Angels': 1,\n          u'Williams': 14,\n          u'dozen': 5,\n          u'Then': 17,\n          u'person': 9,\n          u'bleachers': 2,\n          u'responsible': 5,\n          u'Myron': 1,\n          u'Band': 1,\n          u'recommended': 9,\n          u'absorbed': 4,\n          u'Minister': 5,\n          u'They': 62,\n          u'season': 43,\n          u'Ask': 1,\n          u'grips': 2,\n          u'Missionary': 1,\n          u'Jones': 22,\n          u'Lynn': 4,\n          u'Wise': 1,\n          u'shall': 5,\n          u'Wish': 1,\n          u'object': 2,\n          u'vexing': 1,\n          u'debonair': 1,\n          u'affirmation': 1,\n          u'mouth': 2,\n          u'letter': 7,\n          u'conceded': 1,\n          u'putout': 1,\n          u'Galveston': 2,\n          u'episode': 2,\n          u'Texans': 7,\n          u'professor': 1,\n          u'camp': 4,\n          u'Bulloch': 2,\n          u'Journal-Bulletin': 1,\n          u'ruthless': 1,\n          u'independents': 1,\n          u'prevention': 2,\n          u'Mansion': 1,\n          u'detriment': 1,\n          u'nineteenth': 1,\n          u'mating': 1,\n          u'purged': 1,\n          u'incomplete': 1,\n          u'marvel': 3,\n          u'saying': 8,\n          u'signatures': 5,\n          u'bomb': 10,\n          u'reactor': 3,\n          u'Symonds': 1,\n          u'U-2': 1,\n          u'Union': 14,\n          u'orchestra': 9,\n          u'meetings': 5,\n          u'Agency': 1,\n          u'parolees': 2,\n          u'nominated': 2,\n          u'undue': 2,\n          u\"Communism's\": 1,\n          u'cooking': 1,\n          u'judgeship': 1,\n          u'Paradise': 2,\n          u'culminates': 1,\n          u'driving': 12,\n          u'Congressional': 3,\n          u'Meyner': 3,\n          u'touches': 2,\n          u'busy': 3,\n          u'clicked': 1,\n          u'Extend': 1,\n          u'695': 1,\n          u'headline': 2,\n          u'menu': 1,\n          u'Moller': 1,\n          u'than': 138,\n          u'Tiao': 1,\n          u'theme': 5,\n          u'touched': 2,\n          u'rich': 5,\n          u'Coliseum': 1,\n          u'submarine-ball': 1,\n          u\"Berlin's\": 1,\n          u'plate': 5,\n          u'D.C.': 3,\n          u'$15': 1,\n          u'Nevertheless': 1,\n          u'television': 13,\n          u\"AID's\": 1,\n          u'pocket': 1,\n          u'Mears': 1,\n          u\"Ruth's\": 7,\n          u'Sports': 4,\n          u'societies': 2,\n          u'Senators': 4,\n          u'greens': 2,\n          u'bloodstream': 1,\n          u'ever': 32,\n          u'Rip': 1,\n          u'Rio': 2,\n          u'three-year': 2,\n          u'flanked': 1,\n          u'release': 3,\n          u\"leader's\": 4,\n          u'U-I': 1,\n          u'respond': 1,\n          u'mandatory': 1,\n          u'disaster': 1,\n          u'fair': 10,\n          u'Bennington': 1,\n          u'transferred': 1,\n          u'pads': 1,\n          u'Brevard': 5,\n          u'glad': 1,\n          u'result': 30,\n          u'fail': 2,\n          u'Ave.': 10,\n          u'resigned': 5,\n          u'best': 29,\n          u'pricking': 1,\n          u\"Braves'\": 1,\n          u'lots': 3,\n          u'Heinkel': 2,\n          u'rings': 2,\n          u\"'20's\": 1,\n          u'injuries': 3,\n          u'224-170': 1,\n          u'pressures': 1,\n          u'score': 11,\n          u'Lockies': 2,\n          u'toolmaker': 1,\n          u'preserve': 4,\n          u'indecisive': 1,\n          u'redistricting': 1,\n          u'never': 38,\n          u\"Meyner's\": 1,\n          u'nationwide': 2,\n          u'nature': 7,\n          u'rolled': 2,\n          u'punted': 1,\n          u'authorizing': 2,\n          u'lefthanders': 1,\n          u'drew': 6,\n          u'extent': 1,\n          u'Bronx': 5,\n          u'Peterson': 4,\n          u'roller': 1,\n          u'Capello': 1,\n          u\"war's\": 1,\n          u'accident': 7,\n          u'met': 8,\n          u'country': 24,\n          u'conclusions': 1,\n          u'demanded': 3,\n          u'Vacancy': 1,\n          u'planned': 9,\n          u'logic': 1,\n          u'federalism': 1,\n          u'argue': 2,\n          u'asked': 34,\n          u'30th': 1,\n          u'Apartment': 1,\n          u'liberal-conservative': 1,\n          u'troublesome': 1,\n          u'25%': 1,\n          u'102': 1,\n          u'month-long': 1,\n          u'250': 2,\n          u'255': 1,\n          u'relearns': 1,\n          u'Clarence': 3,\n          u'reconsideration': 2,\n          u'Sitting': 1,\n          u'union': 21,\n          u'Rizzuto': 1,\n          u'breakoff': 1,\n          u'.': 4030,\n          u'Nischwitz': 3,\n          u'extraction': 1,\n          u'startled': 1,\n          u'stadium': 3,\n          u'Cherry': 3,\n          u'privilege': 3,\n          u'one-week-old': 1,\n          u'Flowers': 2,\n          u'dots': 1,\n          u'Precise': 1,\n          u'life': 17,\n          u'retrospect': 1,\n          u'Tokyo': 1,\n          u'worker': 2,\n          u'allotting': 1,\n          u'Stella': 1,\n          u'1,212,000': 1,\n          u'child': 9,\n          u'worked': 12,\n          u'Gloriana': 3,\n          u'Holmes': 9,\n          u'commerce': 3,\n          u'presidency': 2,\n          u'administrative': 3,\n          u'employ': 2,\n          u'misconstrued': 1,\n          u'1213-15': 1,\n          u'Campbell': 1,\n          u\"Gannon's\": 1,\n          u'Brandt': 5,\n          u'Zurcher': 2,\n          u'played': 19,\n          u'Innumerable': 1,\n          u'conditioned': 2,\n          u'player': 6,\n          u'eighteen': 3,\n          u'London-based': 1,\n          u'Courtney': 1,\n          u'Puerto': 2,\n          u'churchmen': 2,\n          u'doorman': 1,\n          u'specter': 1,\n          u'trusted': 2,\n          u'Phouma': 3,\n          u'damaged': 2,\n          u'Valley': 1,\n          u\"Dresbachs'\": 1,\n          u'things': 10,\n          u'cumulative': 1,\n          u'rebellion': 1,\n          u'Newman': 1,\n          u'socialized': 1,\n          u'300': 6,\n          u'harmony': 1,\n          u'babies': 2,\n          u'pre-school': 1,\n          u'fairly': 3,\n          u'Budapest': 2,\n          u'saluted': 1,\n          u'Maybe': 2,\n          u'torpedoes': 1,\n          u'Angeles': 12,\n          u'photographers': 1,\n          u'Peking': 1,\n          u'Living': 3,\n          u'5-to-2': 1,\n          u'5-to-3': 1,\n          u\"Stevenses'\": 1,\n          u'protected': 1,\n          u'furlough': 1,\n          u'matters': 3,\n          u'vice-president': 2,\n          u'academic': 10,\n          u\"d'etat\": 1,\n          u'telephone': 8,\n          u'echoes': 1,\n          u'corporate': 3,\n          u'Funeral': 5,\n          u'fittest': 1,\n          u'opinions': 3,\n          u'spurred': 1,\n          u'provocation': 2,\n          u'capitol': 1,\n          u'sleeps': 1,\n          u'Subsequent': 1,\n          u'distribute': 2,\n          u'fantastic': 1,\n          u'plight': 1,\n          u'rushing': 5,\n          u'succeeding': 1,\n          u'previous': 11,\n          u'ham': 2,\n          u'duffer': 1,\n          u'Oscar': 2,\n          u'ease': 2,\n          u'Odell': 1,\n          u'had': 279,\n          u'emphasis': 5,\n          u'Leonard': 6,\n          u'Mohammedanism': 1,\n          u'Connecticut': 2,\n          u'collections': 4,\n          u'easy': 4,\n          u'prison': 7,\n          u'has': 300,\n          u'hat': 1,\n          u'Apart': 3,\n          u'municipal': 4,\n          u'mediocre': 1,\n          u'Dawson': 1,\n          u'survival': 2,\n          u'disagreement': 3,\n          u'possible': 28,\n          u\"rocket's\": 2,\n          u'firmer': 3,\n          u'possibly': 3,\n          u'opener': 3,\n          u'birth': 5,\n          u'Missouri': 3,\n          u'clustered': 1,\n          u'pertinent': 1,\n          u'unique': 1,\n          u'$2,170': 1,\n          u'desire': 4,\n          u'county': 26,\n          u\"bridegroom's\": 2,\n          u'seaside': 1,\n          u'misled': 1,\n          u'steps': 8,\n          u'Shrove': 1,\n          u\"court's\": 1,\n          u'Further': 2,\n          u'Pentagon': 2,\n          u\"Louis's\": 1,\n          u'Warren': 13,\n          u'attorney': 17,\n          u'right': 33,\n          u'old': 23,\n          u'crowd': 8,\n          u'$1,000,000,000': 1,\n          u'creed': 3,\n          u'Expressways': 1,\n          u'crown': 2,\n          u'System': 2,\n          u'culpas': 1,\n          u'3,325': 1,\n          u'Conservation': 4,\n          u'glove': 4,\n          u'Noel': 1,\n          u'Between': 1,\n          u'enemies': 1,\n          u'MacDonald': 2,\n          u'for': 943,\n          u'bottom': 1,\n          u'p.m.': 38,\n          u'contributing': 1,\n          u'individuals': 5,\n          u'summoned': 4,\n          u'pondered': 1,\n          u'Celebration': 1,\n          u'Donnelly': 1,\n          u'Instant': 1,\n          u'Calls': 2,\n          u\"ol'\": 1,\n          u'dental': 6,\n          u'6,000': 1,\n          u'shifting': 2,\n          u'defensive': 7,\n          u'losing': 5,\n          u'brokerage': 1,\n          u'manufacturing': 5,\n          u'shaken': 2,\n          u'Macon': 2,\n          u'benches': 1,\n          u'boiling': 1,\n          u'dollars': 15,\n          u'citizens': 6,\n          u'globetrotter': 1,\n          u'despair': 1,\n          u'stoked': 1,\n          u'lacked': 3,\n          u'slightly': 4,\n          u'meddle': 1,\n          u'consulting': 3,\n          u'statements': 9,\n          u'Cal.': 1,\n          u'Blacks': 1,\n          u'honeymoon': 3,\n          u'Scotland': 2,\n          u'son': 22,\n          u'undermining': 1,\n          u'Misses': 1,\n          u'one-fourth': 1,\n          u'raiser': 1,\n          u'raises': 3,\n          u'sow': 1,\n          u'stockholder': 1,\n          u'reducing': 2,\n          u'defendants': 9,\n          u'Hank': 4,\n          u'collectors': 1,\n          u'162': 1,\n          u'support': 24,\n          u'constantly': 3,\n          u'busy-work': 1,\n          u'Hand': 1,\n          u'symphony': 1,\n          u\"boy's\": 2,\n          u'10,000,000': 1,\n          u'resulted': 6,\n          u'call': 14,\n          u'happy': 12,\n          u'offer': 9,\n          u'understandably': 1,\n          u'forming': 2,\n          u'Completing': 1,\n          u'Acres': 2,\n          u'talents': 2,\n          u'understandable': 2,\n          u'incinerator': 1,\n          u'underdeveloped': 1,\n          u'duel': 3,\n          u\"else's\": 1,\n          u'Toni': 1,\n          u'inside': 4,\n          u'goutte': 1,\n          u'Waldorf-Astoria': 2,\n          u'County': 35,\n          u'unanimous': 2,\n          u'Guests': 4,\n          u'Tony': 3,\n          u'Hawksley': 10,\n          u'Enrique': 1,\n          u'panels': 5,\n          u'Weatherford': 2,\n          u'Stallard': 1,\n          u'8,293': 1,\n          u'150': 3,\n          u'juvenile': 5,\n          u'later': 34,\n          u'liberal': 4,\n          u'154': 3,\n          u'Trooper': 1,\n          u'Six': 5,\n          u'proven': 1,\n          u\"''\": 702,\n          u'Virgin': 2,\n          u'Squad': 1,\n          u'exist': 1,\n          u'Pittsboro': 1,\n          u'Sid': 1,\n          u'segregationist': 1,\n          u'acacia': 3,\n          u'dealer': 4,\n          u'negotiations': 11,\n          u'Knoll': 1,\n          u'McDaniel': 2,\n          u'college': 18,\n          u'protested': 1,\n          u'Noting': 2,\n          u'Practice': 1,\n          u'eventual': 3,\n          u'floor': 14,\n          u'Track': 1,\n          u'Possible': 1,\n          u'crowns': 1,\n          u'flood': 1,\n          u'Nolan': 1,\n          u'republic': 1,\n          u'amicable': 1,\n          u'ambitious': 1,\n          u'entomologist': 1,\n          u'Norristown': 1,\n          u'smell': 2,\n          u'roll': 4,\n          u'steamship': 1,\n          u'intend': 3,\n          u'Lenny': 1,\n          u'models': 2,\n          u'high-wage': 1,\n          u'Western-style': 1,\n          u'Luthuli': 1,\n          u'eminent': 1,\n          u'scale': 1,\n          u'smelling': 1,\n          u'persecution': 1,\n          u'source': 7,\n          u'Charley': 4,\n          u'fastened': 1,\n          u'Mostly': 1,\n          u'Debutante': 2,\n          u\"workers'\": 1,\n          u'Charles': 22,\n          u'Quaker': 1,\n          u\"O'Hare\": 1,\n          u'Fifth': 1,\n          u'time': 97,\n          u'push': 2,\n          u'conferred': 1,\n          u'Empire': 2,\n          u'Principal': 2,\n          u'gown': 6,\n          u'smelts': 1,\n          u'chain': 2,\n          u'criteria': 2,\n          u'Indians': 4,\n          u'Nicklaus': 1,\n          u'integration': 6,\n          u'tee': 4,\n          u'theaters': 6,\n          u'645-acre': 1,\n          u'Wabash': 1,\n          u'Indiana': 2,\n          u'chair': 1,\n          u'$278,877,000': 1,\n          u'Beyeler': 2,\n          u'ballet': 7,\n          u'92': 1,\n          u'8861': 1,\n          u'900-student': 1,\n          u'sweat-suits': 1,\n          u'shouldda': 1,\n          u'carpenters': 1,\n          u'Bahi': 1,\n          u'96': 1,\n          u'verbally': 1,\n          u'recipient': 3,\n          u'Prize': 1,\n          u'Political': 1,\n          u\"Howsam's\": 1,\n          u'choice': 6,\n          u'Lyle': 1,\n          u'alcoholics': 2,\n          u'mourn': 1,\n          u'stays': 1,\n          u'southpaw': 5,\n          u'right-handed': 1,\n          u'exact': 1,\n          u'minute': 1,\n          u'Tau': 1,\n          u'1.10.8': 1,\n          u'3-month': 1,\n          u'Fifteen': 1,\n          u'1.10.4': 1,\n          u'rights': 3,\n          u'Tax': 5,\n          u'make': 43,\n          u'1.10.1': 1,\n          u'leave': 5,\n          u'solved': 2,\n          u'depositors': 1,\n          u'settle': 1,\n          u'team': 33,\n          u'Patience': 1,\n          u'prevent': 12,\n          u'spiritual': 1,\n          u'$80,738': 2,\n          u'M.': 22,\n          u'prediction': 1,\n          u'sign': 9,\n          u'Bldg.': 1,\n          u'3505o': 1,\n          u'ogled': 1,\n          u'Lt.': 1,\n          u'Look': 3,\n          u'Associations': 1,\n          u'Adamson': 1,\n          u'jeopardy': 1,\n          u'celebrated': 2,\n          u'locker': 3,\n          u'melt': 1,\n          u'current': 13,\n          u'wayward': 1,\n          u\"Tuttle's\": 1,\n          u'Southwest': 2,\n          u'boost': 6,\n          u'Lopez': 1,\n          u'Me': 2,\n          u'drafted': 1,\n          u'jury': 44,\n          u'funeral': 2,\n          u'understanding': 5,\n          u\"Leopold's\": 1,\n          u'yards': 22,\n          u'address': 12,\n          u'alone': 8,\n          u'along': 34,\n          u'$80': 1,\n          u'My': 9,\n          u'Godwin': 1,\n          u'nitroglycerine': 1,\n          u'passengers': 3,\n          u'revenues': 13,\n          u'Associated': 2,\n          u'Cornell': 1,\n          u'transition': 3,\n          u'brilliant': 3,\n          u'saws': 2,\n          u'studied': 7,\n          u'wherever': 1,\n          u'Casals': 1,\n          u'accomplished': 4,\n          u'studies': 2,\n          u'influx': 1,\n          u'tasks': 2,\n          u'love': 3,\n          u'Hagner': 1,\n          u'Thornton': 1,\n          u'prefer': 3,\n          u\"Leavitt's\": 1,\n          u'jolt': 1,\n          u'Lisle': 1,\n          u'redevelopers': 1,\n          u'Davidson': 1,\n          u'opposes': 2,\n          u'cocktail': 6,\n          u'August': 12,\n          u'working': 16,\n          u'Sarasota': 1,\n          u'positive': 2,\n          u'angry': 3,\n          u'tightly': 1,\n          u'Ghormley': 1,\n          u'cherished': 1,\n          u'wood': 1,\n          u'opposed': 9,\n          u'films': 3,\n          u'scope': 1,\n          u'Pinsk': 1,\n          u'Those': 8,\n          u'loving': 1,\n          u'``': 732,\n          u'Klaus': 1,\n          u'afford': 4,\n          u'subsistence': 1,\n          u'apparent': 7,\n          u'validity': 1,\n          u'Jimmy': 4,\n          u'virtue': 2,\n          u'Achaeans': 1,\n          u'scratches': 3,\n          u'Retail': 1,\n          u'Opelika': 1,\n          ...})"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## 让我们拆开来看，只看两个文体:新闻和言情\n",
    "genre_word = [(genre, word)\n",
    "    for genre in ['news', 'romance']\n",
    "    for word in brown.words(categories=genre)]\n",
    "len(genre_word)\n",
    "genre_word[:4]\n",
    "cfd = nltk.ConditionalFreqDist(genre_word)\n",
    "cfd.conditions()\n",
    "#让我们访问这两个条件，它们每一个都只是一个频率分布:\n",
    "cfd['news']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZQAAAEGCAYAAABCa2PoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHh5JREFUeJzt3Xt4VfWd7/H3R24RVBSVTjBVYLxUEXBM8FotVgXstCJe\nzih9jrQV6dTR2vbU29HxgnYe7dixTu2xYrF1Ol6qnmrh1BteYltbuURRRLxQSmtqqyKKGFCIfM8f\nayVu4k6y2Vl772z9vJ4nT9b+rbX275ME8s1vXX5LEYGZmVlPbVXpAGZm9tHggmJmZplwQTEzs0y4\noJiZWSZcUMzMLBMuKGZmlgkXFDMzy4QLipmZZcIFxczMMtG30gHKaaeddorhw4cXte/69evZeuut\nsw3kHFWfwTmco7dnyCJHU1PTqojYudsNI+Jj81FfXx/FWrRoUdH7Zsk5eleGCOfoyDl6V4aInucA\nFkUBv2N9yMvMzDLhgmJmZplwQTEzs0y4oJiZWSZcUMzMLBMuKGZmlgkXFDMzy4QLipmZZcIFxczM\nMuGCYmZmmXBBMTOzTLigmJlZJlxQzMwsEy4oZmaWCRcUMzPLhAuKmZllwgXFzMwy4YJiZmaZcEEx\nM7NMuKCYmVkmXFDMzCwTLihmZpYJFxQzM8uEC4qZmWWiogVF0iRJL0haLun8POsHSPp5un6+pOEd\n1u8q6R1J3y5XZjMzy69iBUVSH+CHwDHAPsApkvbpsNlpwJsRsTtwDXBVh/XXAPeVOquZmXWvkiOU\nA4DlEbEiIjYAtwOTO2wzGbg5Xb4LOFKSACQdB6wAlpYpr5mZdaGSBWUX4OWc181pW95tIqIVWAPs\nKGkQcB5wWRlymplZAfpWsG/laYsCt7kMuCYi3kkHLJ13Is0AZgDU1tbS1NRURFRYt25d0ftmyTl6\nVwbncI7enqGsOSKiIh/AwcADOa8vAC7osM0DwMHpcl9gFUmR+Q2wMv14C1gNnNldn/X19VGsRYsW\nFb1vlpyjd2WIcI6OnKN3ZYjoeQ5gURTwe72SI5SFwB6SRgB/AU4GpnbYZg4wDfg9cCLwSPrFHda2\ngaRLgXci4rpyhDYzs/wqVlAiolXSmSSjkD7ATRGxVNJMkmo4B5gN/EzScpJRyMmVymtmZl2r5AiF\niLgXuLdD28U5y+8CJ3XzHpeWJJyZmW0R3ylvZmaZcEExM7NMuKCYmVkmXFDMzCwTLihmZpYJFxQz\nM8uEC4qZmWXCBcXMzDLhgmJmZplwQTEzs0y4oJiZWSZcUMzMLBMuKGZmlgkXFDMzy4QLipmZZcIF\nxczMMuGCYmZmmXBBMTOzTLigmJlZJlxQzMwsEy4oZmaWCRcUMzPLhAuKmZllwgXFzMwy4YJiZmaZ\ncEExM7NMuKCYmVkmXFDMzCwT3RYUSYMkbZUu7ynpWEn9Sh/NzMyqSSEjlF8DNZJ2AR4Gvgz8tJSh\nzMys+hRSUBQR64DjgR9ExBRgn9LGMjOzalNQQZF0MPBF4FdpW98sOpc0SdILkpZLOj/P+gGSfp6u\nny9peNp+tKQmSUvSz5/NIo+ZmRWvkIJyNnABcHdELJU0Eni0px1L6gP8EDiGZMRziqSOI5/TgDcj\nYnfgGuCqtH0V8IWIGA1MA37W0zxmZtYzhYw0PhERx7a9iIgVkn6TQd8HAMsjYgWApNuBycBzOdtM\nBi5Nl+8CrpOkiHgqZ5ulJOd4BkTEexnkMjOzIigiut5AejIi9u+ubYs7lk4EJkXE9PT1/wQOjIgz\nc7Z5Nt2mOX39h3SbVR3e558j4qhO+pkBzACora2tnzt3blF5161bx8CBA4vaN0vO0bsyOIdz9PYM\nWeRoaGhoioiG7rbrdIQi6Rjgc8Aukv4zZ9V2QGvRyXK6yNPWsbp1uY2kUSSHwSZ01klEzAJmATQ0\nNER9ff2WJwWampoodt8sOUfvyuAcztHbM5QzR1eHvF4BFgHHAk057WuBb2bQdzPwyZzXdWmf+bZp\nltQXGAysBpBUB9wNnBoRf8ggj5mZ9UCnBSUingaelnRrRGwsQd8LgT0kjQD+ApwMTO2wzRySk+6/\nB04EHomIkLQ9yRVnF0TE4yXIZmZmW6iQq7wOkDRP0ouSVkj6o6QVPe04IlqBM4EHgGXAHelVZDMl\ntV0EMBvYUdJy4FtA26XFZwK7A/8qaXH6MbSnmczMrHiFXOU1m+QQVxPwfpadR8S9wL0d2i7OWX4X\nOCnPflcAV2SZxczMeqaQgrImIu4reRIzM6tqhRSURyX9O/ALoP0+j4h4smSpzMys6hRSUA5MP+de\ngxyApzsxM7N23RaUiDiiHEHMzKy6dVtQJF2crz0iZmYfx8zMqlUhh7xacpZrgM+TXOZrZmbWrpBD\nXt/LfS3papIbDs3MzNoV80z5gcDIrIOYmVl1K+QcyhI+mJCxD7Az4PMnZma2mULOoXw+Z7kVeDWd\nNsXMzKxdt4e8IuJPwPbAFwA/T97MzPLqtqBIOhu4BRiaftwi6axSBzMzs+pSyCGv00iektgCIOkq\nkunkf1DKYGZmVl0KucpLbD7L8Pvkf5KimZl9jBUyQvkJMF/S3enr40imtDczM2tXyI2N/yGpEfg0\nycjkyxHxVKmDmZlZdem0oEgaB+wUEfelU9U/mbYfK2mriGjqbF8zM/v46eocyr+Tf86u59J1ZmZm\n7boqKDtGxMqOjRGxHNixZInMzKwqdVVQtu5i3aCsg5iZWXXrqqA8JOk7kja7RFjSZcAjpY1lZmbV\npqurvP4X8GNguaTFadtYYBEwvdTBzMysunRaUNI740+RNBIYlTYvjYgVZUlmZmZVpZD7UFYALiJm\nZtalYh6wZWZm9iEuKGZmlomCCoqkT0v6crq8s6QRpY1lZmbVppDnoVwCnAdckDb1A/67lKHMzKz6\nFDJCmQIcC7QARMQrwLalDGVmZtWnkIKyISICCABJvkvezMw+pJCCcoekG4DtJZ0OPATcmEXnkiZJ\nekHScknn51k/QNLP0/XzJQ3PWXdB2v6CpIlZ5DEzs+IVch/K1ZKOBt4G9gIujoh5Pe1YUh/gh8DR\nQDOwUNKciHguZ7PTgDcjYndJJwNXAf8kaR/gZJIbLoeRTBOzZ0S8j5mZVUS3BUXSN4E7sygiHRwA\nLG+7817S7cBkkunx20wGLk2X7wKuS+cWmwzcHhHvAX+UtDx9v99nnNHMzApUyCOAtwMekLQauB24\nKyJezaDvXYCXc143Awd2tk1EtEpaQzJ1/i7AEx323SWDTPldOph6gLkl66FgztG7MoBzdOQcvSsD\npDnq15S8n0IOeV0GXCZpDPBPwGOSmiPiqB72rTxtUeA2heybvIE0A5gBUFtbS1PTlj9osn6L9zAz\n612K+d23pQoZobR5Dfgb8AYwNIO+m4FP5ryuA17pZJtmSX2BwcDqAvcFICJmAbMAGhoaor6+iPJQ\nv4ampiaK2jdjztG7MjiHc/T2DOXMUciNjV+T1Ag8DOwEnB4RYzLoeyGwh6QRkvqTnGSf02GbOcC0\ndPlE4JH0EuY5wMnpVWAjgD2ABRlkMjOzIhUyQtkN+EZELO52yy2QnhM5E3gA6APcFBFLJc0EFkXE\nHGA28LP0pPtqkqJDut0dJCfwW4F/8RVeZmaV1WlBkbRdRLwNfDd9PSR3fUSs7mnnEXEvcG+Htotz\nlt8FTupk3+8A3+lpBjMzy0ZXI5Rbgc8DTXz4RHgAI0uYy8zMqkxXT2z8fPrZMwubmVm3Cjkp/3Ah\nbWZm9vHW1TmUGmAgsJOkHfjgkNd2JNOdmJmZtevqHMpXgW+QFI8mPigob5PMwWVmZtauq3Mo1wLX\nSjorIn5QxkxmZlaFCpl65QeS9gX2AWpy2v+rlMHMzKy6FDLb8CXAeJKCci9wDPBbwAXFzMzaFfKA\nrROBI4G/RcSXgbHAgJKmMjOzqlNIQVkfEZuAVknbkUwS6ZsazcxsM4XM5bVI0vYkj/1tAt7BEzGa\nmVkHhZyUPyNd/JGk+4HtIuKZ0sYyM7Nq09WNjft3tS4inixNJDMzq0ZdjVC+18W6AD6bcRYzM6ti\nXd3YeEQ5g5iZWXUr5D6UU/O1+8ZGMzPLVchVXuNylmtI7kl5Et/YaGZmOQq5yuus3NeSBgM/K1ki\nMzOrSoXc2NjROmCPrIOYmVl1K+QcylySq7ogKUD7AHeUMpSZmVWfQs6hXJ2z3Ar8KSKaS5THzMyq\nVCHnUB4DSOfx6psuD4mI1SXOZmZmVaSQQ14zgMuB9cAmkic3Bp4g0szMchRyyOscYFRErCp1GDMz\nq16FXOX1B5Iru8zMzDpVyAjlAuB3kuYD77U1RsTXS5bKzMyqTiEF5QbgEWAJyTkUMzOzDymkoLRG\nxLdKnsTMzKpaIedQHpU0Q1KtpCFtHyVPZmZmVaWQEcrU9PMFOW2+bNjMzDZTyI2NI8oRxMzMqltF\nnoeSHjL7OTAcWAn8j4h4M89204CL0pdXRMTNkgYCdwJ/D7wPzI2I84vNYmZm2SjkHMq4nI/DgEuB\nY3vY7/nAwxGxB/Bw+nozadG5BDgQOAC4RNIO6eqrI+JTwD8Ah0o6pod5zMyshyr1PJTJwPh0+Wag\nETivwzYTgXltc4ZJmgdMiojbgEfTbBskPQnU9TCPmZn1kCKi+61yd5D6Ac9ExN5Fdyq9FRHb57x+\nMyJ26LDNt4GaiLgiff2vwPqIuDpnm+1Jnh55VESs6KSvGcAMgNra2vq5c+cWlXndunUMHDiwqH2z\n5By9K4NzOEdvz5BFjoaGhqaIaOhuu5I9D0XSQ8Df5Vl1YXf7tr1Fnrb26iepL3Ab8J+dFROAiJgF\nzAJoaGiI+vr6ArvfXFNTE8XumyXn6F0ZnMM5enuGcuYo2fNQIuKoztZJelVSbUT8VVIt8FqezZr5\n4LAYJIe1GnNezwJeiojvd5fFzMxKr9OCIml34BNtz0PJaT9M0oCI+EMP+p0DTAOuTD//Ms82DwD/\nlnMifgLpvTCSrgAGA9N7kMHMzDLU1VVe3wfW5mlfn67riSuBoyW9BBydvkZSg6QfA6Qn4y8HFqYf\nMyNitaQ6ksNm+wBPSlosyYXFzKzCujrkNTwinunYGBGLJA3vSacR8QZwZL73JmfUERE3ATd12KaZ\n/OdXzMysgroaodR0sW7rrIOYmVl166qgLJR0esdGSacBTaWLZGZm1airQ17fAO6W9EU+KCANQH9g\nSqmDmZlZdem0oETEq8Ahko4A9k2bfxURj5QlmZmZVZVCpl55lHSqEzMzs84UMjmkmZlZt1xQzMws\nEy4oZmaWCRcUMzPLhAuKmZllwgXFzMwy4YJiZmaZcEExM7NMuKCYmVkmXFDMzCwTLihmZpYJFxQz\nM8uEC4qZmWXCBcXMzDLhgmJmZplwQTEzs0y4oJiZWSZcUMzMLBMuKGZmlgkXFDMzy0TfSgcwMyuV\njRs30tzczLvvvluR/vv27cuyZcsq0ncxOWpqaqirq6Nfv37F9VPUXmZmVaC5uZltt92W4cOHI6ns\n/be0tDBo0KCy91tMjojgjTfeoLm5mREjRhTVjw95mdlH1rvvvsuOO+5YkWJSbSSx44479mg054Ji\nZh9pLiaF6+n3ygXFzMwyUZGCImmIpHmSXko/79DJdtPSbV6SNC3P+jmSni19YjOz3u2QQw6pdISK\njVDOBx6OiD2Ah9PXm5E0BLgEOBA4ALgkt/BIOh54pzxxzcx6p/fffx+A3/3udxVOUrmCMhm4OV2+\nGTguzzYTgXkRsToi3gTmAZMAJG0DfAu4ogxZzcx65LjjjqO+vp5Ro0Yxa9YsALbZZhvOO+886uvr\nOeqoo1iwYAHjx49n5MiRzJkzB0iKxTnnnMO4ceMYM2YMN9xwAwCNjY0cccQRTJ06ldGjR7e/X5vv\nfve7jB49mrFjx3L++cnf6zfeeCPjxo1j7NixnHDCCaxbty7zr7NSlw1/IiL+ChARf5U0NM82uwAv\n57xuTtsALge+B2T/HTGzj6Th5/+qJO+78sp/7Habm266iSFDhrB+/XrGjRvHCSecQEtLC+PHj+eq\nq65iypQpXHTRRcybN4/nnnuOadOmceyxxzJ79mwGDx7MwoULee+99zj00EOZMGECAAsWLODZZ5/9\n0CW+9913H/fccw/z589n4MCBrF69GoDjjz+e008/HYCLLrqI2bNnc9ZZZ2X6vShZQZH0EPB3eVZd\nWOhb5GkLSfsBu0fENyUNLyDHDGAGQG1tLU1NTQV2v7l169YVvW+WnKN3ZXCO3p2jb9++tLS0lLSf\nrt5/06ZNtLS0cPXVVzN37lwAXn75ZZ555hn69+/PYYcdRktLC3vttRcDBgxgw4YNjBw5kpUrV9LS\n0sK9997L0qVLueOOOwB4++23WbJkCf369aO+vp6hQ4du1n9LSwv33XcfU6dOJSJoaWlhwIABbNq0\niYULFzJz5kzWrFlDS0sLRx55ZN7sGzZsKPrnV7KCEhFHdbZO0quSatPRSS3wWp7NmoHxOa/rgEbg\nYKBe0kqS/EMlNUbEePKIiFnALICGhoaor6/f8i8GaGpqoth9s+QcvSuDc/TuHP3792+/oa+QkUTW\nWlpaWLhwIb/+9a/bRwzjx49HEv369Ws/TFVTU8M222zTnrW1tZVBgwbRp08frrvuOiZOnLjZ+zY2\nNrLddtt96GbFQYMG0bdvX2pqajZb19LSwte+9jXuuecexo4dy09/+lMaGxvz3uzYv39/xo4dW9TX\nW6lzKHOAtqu2pgG/zLPNA8AESTukJ+MnAA9ExPURMSwihgOfBl7srJiYmVXamjVr2GGHHRg4cCDP\nP/88TzzxRMH7Tpw4keuvv56NGzcC8OKLL3Y74powYQI33XRT+zmStkNea9eupba2lo0bN3LLLbcU\n+dV0rVLnUK4E7pB0GvBn4CQASQ3AP0fE9IhYLelyYGG6z8yIWF2ZuGZmxZk0aRI/+tGPGDNmDHvt\ntRcHHXRQwftOnz6dlStXsv/++xMR7Lzzztxzzz3d9rd48WIaGhro378/n/vc57jwwgu5/PLLOfDA\nA9ltt90YPXo0a9eu7emX9iGKiMzftLdqaGiIRYsWFbVvbxrGO0fvyeAcvTvHwIED2XvvvSuWoZrm\n8mqzbNmyD33PJDVFREN3+/pOeTMzy4QLipmZZcIFxczMMuGCYmZmmXBBMTOzTLigmJlZJlxQzMxK\n6Ctf+QpDhw5l3333bW9bvHgxBx10EPvttx8NDQ0sWLCgfV1jYyP77bcfo0aN4jOf+Ux7+/Dhwxk9\nenT7Pr2RC4qZWQl96Utf4v7779+s7dxzz+WSSy5h8eLFzJw5k3PPPReAt956izPOOIM5c+awdOlS\n7rzzzs32e/TRR1m8eDHF3k9Xai4oZmYldPjhhzNkyJDN2iTx9ttvA8nULMOGDQPg1ltv5fjjj2fX\nXXcFYOjQfBOx916VmnrFzKy8Lh1covdds8W7fP/732fixIl8+9vfZtOmTe0Px3rxxRfZuHEj48eP\nZ+3atZx99tmceuqpQFKEJkyYgCS++tWvMmPGjEy/jCy4oJiZldn111/PNddcwwknnMAdd9zBaaed\nxkMPPURraytNTU08/PDDrF+/noMPPpiDDjqIPffck8cff5xhw4bx2muvcfTRR/OpT32Kww8/vNJf\nymZcUMzs46GIkUSp3HzzzVx77bUAnHTSSUyfPh2Auro6dtppJwYNGsSgQYM4/PDDefrpp9lzzz3b\nD4sNHTqUKVOmsGDBgl5XUHwOxcyszIYNG8Zjjz0GwCOPPMIee+wBwOTJk/nNb35Da2sr69atY/78\n+ey99960tLS0zw7c0tLCgw8+uNlVY72FRyhmZiV0yimn0NjYyKpVq6irq+Oyyy7jxhtv5Oyzz6a1\ntZWampr258zvvffeTJo0iTFjxrDVVlsxffp09t13X1asWMGUKVOA5OFbU6dOZdKkSZX8svJyQTEz\nK6Hbbrstb3tnj9k955xzOOecczZrGzlyJE8//XTm2bLmQ15mZpYJFxQzM8uEC4qZfaR9nJ5K21M9\n/V65oJjZR1ZNTQ1vvPGGi0oBIoI33niDmpqaot/DJ+XN7COrrq6O5uZmXn/99Yr0v2HDBvr371+R\nvovJUVNTQ11dXdH9uKCY2UdWv379GDFiRMX6b2pqYuzYsRXrv9w5fMjLzMwy4YJiZmaZcEExM7NM\n6ON09YOk14E/Fbn7TsCqDOMUyzl6VwZwjo6co3dlgJ7n2C0idu5uo49VQekJSYsiouLP3XSO3pXB\nOZyjt2coZw4f8jIzs0y4oJiZWSZcUAo3q9IBUs7xgd6QAZyjI+f4QG/IAGXK4XMoZmaWCY9QzMws\nEy4oZmaWCRcUMzPLhCeH7ISkTwGTgV2AAF4B5kTEsooG+xiTdAAQEbFQ0j7AJOD5iLi3wrn+KyJO\nrWQGqzxJ/YGTgVci4iFJU4FDgGXArIjYWNGAZeCT8nlIOg84BbgdaE6b60j+sdweEVdWKlslpMV1\nF2B+RLyT0z4pIu4vU4ZLgGNI/giaBxwINAJHAQ9ExHfKlGNOxybgCOARgIg4thw5OpL0aeAA4NmI\neLCM/R4ILIuItyVtDZwP7A88B/xbRKwpQ4avA3dHxMul7qubHLeQ/PscCLwFbAP8AjiS5HfttDJm\n+XtgCvBJoBV4Cbit1D8PF5Q8JL0IjOr4F0X6F8jSiNijMsk2y/LliPhJGfr5OvAvJH9l7QecHRG/\nTNc9GRH7lzpD2teStP8BwN+AupxfYvMjYkyZcjxJ8svyxyQjVwG3kfyxQUQ8VqYcCyLigHT5dJKf\n0d3ABGBuuf7okbQUGBsRrZJmAeuAu0h+iY6NiOPLkGEN0AL8geRncWdElP0BKJKeiYgxkvoCfwGG\nRcT7kgQ8XcZ/o18HvgA8BnwOWAy8SVJgzoiIxpJ1HhH+6PABPE8yd03H9t2AFyqdL83y5zL1swTY\nJl0eDiwiKSoAT5Xx630q33L6enEZc2wFfJNklLRf2raiAj//3O/HQmDndHkQsKSMOZblLD9ZiZ8L\n8FT6c5kAzAZeB+4HpgHblvF78SzQH9gBWAsMSdtrcr9PZcixBOiTLg8EGtPlXUv9f9bnUPL7BvCw\npJeAtmH0rsDuwJnlCiHpmc5WAZ8oU4w+kR7mioiVksYDd0naLc1RLhskDYyIdUB9W6OkwcCmcoWI\niE3ANZLuTD+/SmXORW4laQeSX6SK9C/yiGiR1FrGHM/mjJafltQQEYsk7QmU65xBpD+XB4EHJfUj\nOTx6CnA10O2khhmZTfLHaB/gQuBOSSuAg0gOn5dTX+B9khH9tgAR8ef0e1MyPuTVCUlbkRyT3oXk\nF2czsDAi3i9jhleBiSTD1c1WAb+LiGFlyPAI8K2IWJzT1he4CfhiRPQpdYa0zwER8V6e9p2A2ohY\nUo4cefr/R+DQiPjfZe53JUkhFcmht0Mi4m+StgF+GxH7lSnHYOBa4DCS2Wz3J/kj7GXg6xHxdBky\nPBUR/9DJuq0jYn2pM+T0NwwgIl6RtD3JOb4/R8SCMmY4GzgNeAI4HLgqIn4iaWfg/0bE4SXr2wWl\n95I0G/hJRPw2z7pbI2JqGTLUAa0R8bc86w6NiMdLncEKJ2kg8ImI+GOZ+90WGEnyl3FzRLxaxr73\njIgXy9VfNZA0Ctib5CKN58vWrwuKmZllwTc2mplZJlxQzMwsEy4oZkWQdKGkpZKekbQ4vcGvVH01\nSqr4U//MuuPLhs22kKSDgc8D+0fEe+mVZv0rHMus4jxCMdtytcCqtsuYI2JVepnoxZIWSnpW0qz0\nDum2EcY1kn4taZmkcZJ+IeklSVek2wyX9Lykm9NRz13pFVubkTRB0u8lPSnpzvQyYSRdKem5dN+r\ny/i9MGvngmK25R4EPinpRUn/R9Jn0vbrImJcROwLbE0yimmzIb3+/0fAL0mmStkX+JKkHdNt9iKZ\nRHAM8DZwRm6n6UjoIuCoSKa8WQR8S9IQkmk1RqX7XlGCr9msWy4oZlsonTmgHphBMs3HzyV9CThC\n0vx03rHPAqNydmubVHIJyXxwf01HOCtIJvADeDnnvp7/Bj7doeuDgH2AxyUtJplaZDeS4vMu8GNJ\nx5PMp2VWdj6HYlaEdMaERqAxLSBfBcYADRHxsqRLSeZwatN2l/+mnOW2123/DzveFNbxtYB5EXFK\nxzzp1P5HkkxSeSZJQTMrK49QzLaQpL0k5c44vR/wQrq8Kj2vcWIRb71resIfknmoOs6Q8ARwqKTd\n0xwDJe2Z9jc4kufCfCPNY1Z2HqGYbbltgB+kczW1AstJDn+9RXJIayXJDMBbahkwTdINJM+vuD53\nZUS8nh5au03SgLT5IpKZbX8pqYZkFPPNIvo26zFPvWLWC0gaDvy/9IS+WVXyIS8zM8uERyhmZpYJ\nj1DMzCwTLihmZpYJFxQzM8uEC4qZmWXCBcXMzDLx/wEzSA8drBOTJgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10d798fd0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "## 绘制分布图和分布表\n",
    "\n",
    "from nltk.corpus import inaugural\n",
    "cfd = nltk.ConditionalFreqDist(\n",
    "    (target, fileid[:4])\n",
    "    for fileid in inaugural.fileids()\n",
    "    for w in inaugural.words(fileid)\n",
    "    for target in ['america', 'citizen']\n",
    "    if w.lower().startswith(target))\n",
    "\n",
    "cfd.plot(conditions=['america', '1865'], samples=range(10), cumulative=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('In', 'the')\n('the', 'beginning')\n('beginning', 'God')\n('God', 'created')\n('created', 'the')\n('the', 'heaven')\n('heaven', 'and')\n('and', 'the')\n('the', 'earth')\n('earth', '.')\n"
     ]
    }
   ],
   "source": [
    "## 使用双连词生成随机文本 \n",
    "sent = ['In', 'the', 'beginning', 'God', 'created', 'the', 'heaven',\n",
    "        'and', 'the', 'earth', '.']\n",
    "for item in nltk.bigrams(sent):\n",
    "    print(item)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "living creature that he said , and the land of the land of the land\n"
     ]
    }
   ],
   "source": [
    "## 例 2-1. 产生随机文 :此程序获得《创世记》文 中所有的双连词，\n",
    "# 然后构造一个条件频率分 布来记录哪些词汇最有可能跟在给定词的后面\n",
    "def generate_model(cfdist, word, num=15): \n",
    "    for i in range(num):\n",
    "        print word,\n",
    "        word = cfdist[word].max()\n",
    "text = nltk.corpus.genesis.words('english-kjv.txt') \n",
    "bigrams = nltk.bigrams(text)\n",
    "cfd = nltk.ConditionalFreqDist(bigrams)\n",
    "cfd['living'].items()\n",
    "generate_model(cfd, 'living')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 表 2-4. NLTK 中的条件频率分布:定义、访问和可视化一个计数的条件频率分布的常用方法和习 惯用法\n",
    "\n",
    "* fdist= ConditionalFreqDist(pairs) 从配对链表中创建条件频率分布\n",
    "* fdist.conditions() 将条件按字母排序\n",
    "* fdist[condition] 此条件下的频率分布\n",
    "* fdist[condition][sample] 此条件下给定样 的频率\n",
    "* fdist.tabulate() 为条件频率分布制表\n",
    "* fdist.tabulate(samples, conditions) 指定样 和条件限制下制表\n",
    "* fdist.plot() 为条件频率分布绘图\n",
    "* fdist.plot(samples, conditions) 指定样 和条件限制下绘图\n",
    "* fdist1 < cfdist2 测试样 在cfdist1中出现次数是否小于在cfdist2中出现次 数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[u'abbeyland', u'abhorred', u'abilities', u'abounded', u'abridgement', u'abused', u'abuses', u'accents', u'accepting', u'accommodations', u'accompanied', u'accounted', u'accounts', u'accustomary', u'aches', u'acknowledging', u'acknowledgment', u'acknowledgments', u'acquaintances', u'acquiesced', u'acquitted', u'acquitting', u'acted', u'actions', u'adapted', u'adding', u'additions', u'addressed', u'addresses', u'addressing', u'adhering', u'adieus', u'adjusting', u'administering', u'admirers', u'admires', u'admitting', u'adorned', u'advances', u'advantages', u'affairs', u'affections', u'affects', u'affixed', u'afflictions', u'afforded', u'affording', u'ages', u'agitated', u'agonies', u'ailments', u'aimed', u'alarms', u'alienated', u'alighted', u'alleged', u'allenham', u'allowances', u'allowed', u'allowing', u'alluded', u'alterations', u'altered', u'altering', u'amended', u'amounted', u'amusements', u'ankles', u'annamaria', u'annexed', u'announced', u'announcing', u'annuities', u'annum', u'answered', u'answering', u'answers', u'anticipated', u'anticipating', u'anticipations', u'anymore', u'apartments', u'apologies', u'apologising', u'apologized', u'appearances', u'appeared', u'appearing', u'appeased', u'appetites', u'applauded', u'applying', u'appointed', u'apprehended', u'apprehensions', u'approached', u'approved', u'arbour', u'ardour', u'arguments', u'arranged', u'arrangements', u'arranging', u'arrived', u'arrives', u'arriving', u'ascended', u'ascertained', u'asked', u'asking', u'assembled', u'assemblies', u'asserted', u'assertions', u'assiduities', u'assisted', u'assisting', u'associating', u'assurances', u'astonished', u'atoned', u'atoning', u'attaching', u'attachments', u'attacked', u'attacks', u'attained', u'attempted', u'attempting', u'attempts', u'attendants', u'attended', u'attending', u'attentions', u'attracted', u'attractions', u'attributed', u'attributing', u'auditors', u'augmenting', u'austen', u'authorised', u'authors', u'availed', u'avignon', u'avoided', u'avoiding', u'awaited', u'awakened', u'awaking', u'bags', u'balls', u'banished', u'barouches', u'bathed', u'bears', u'beasts', u'beauties', u'became', u'bedrooms', u'beds', u'befallen', u'befalls', u'befell', u'began', u'begged', u'begins', u'behaved', u'beings', u'believed', u'believes', u'belonged', u'belongs', u'benefited', u'bequeathed', u'berkeley', u'bestowed', u'betrayed', u'betraying', u'biased', u'blackest', u'blameable', u'blessings', u'blights', u'blossoms', u'blundered', u'blushed', u'blushes', u'bolder', u'bones', u'bonomi', u'books', u'booksellers', u'borrowed', u'bottoms', u'boys', u'breakfasting', u'bribing', u'brightened', u'brighter', u'bringing', u'brings', u'broader', u'brothers', u'bruised', u'buildings', u'bursts', u'buying', u'called', u'calls', u'calming', u'candles', u'candour', u'canvassing', u'cards', u'cares', u'caresses', u'careys', u'carriages', u'carries', u'cases', u'casts', u'cats', u'caused', u'ceased', u'ceasing', u'censured', u'centre', u'certainties', u'chagrined', u'chairs', u'chambers', u'chanced', u'changed', u'changes', u'changing', u'characters', u'charged', u'charmed', u'charms', u'cheated', u'checking', u'cheeks', u'cheerfuller', u'cherished', u'cherries', u'children', u'choked', u'chuse', u'chusing', u'circles', u'circumstances', u'civilities', u'claimed', u'claiming', u'claims', u'clarke', u'cleared', u'cleveland', u'clogged', u'closing', u'clouds', u'coats', u'collecting', u'coloured', u'colouring', u'combe', u'comforted', u'comforts', u'comings', u'commanded', u'commands', u'commended', u'comments', u'commissioned', u'commonest', u'communicated', u'companions', u'compared', u'compares', u'comparisons', u'complained', u'complaining', u'complaints', u'completed', u'compliments', u'comprehended', u'concealing', u'concerns', u'concessions', u'concluded', u'conclusions', u'conditions', u'conducted', u'confessed', u'confidante', u'conforming', u'congratulated', u'congratulating', u'congratulations', u'conjectured', u'conjectures', u'conjecturing', u'connections', u'conquests', u'consented', u'consequences', u'considerations', u'considers', u'consisted', u'consists', u'consoled', u'conspired', u'constantia', u'consulted', u'contained', u'containing', u'contend', u'contenting', u'continuing', u'contradicted', u'contrasted', u'contributed', u'contributing', u'contrived', u'contrives', u'contriving', u'controlled', u'conveniences', u'conversations', u'conversed', u'conversing', u'conveyed', u'conveying', u'copying', u'cordials', u'cottages', u'counsellor', u'counteracted', u'couples', u'courted', u'courting', u'courtland', u'cousins', u'cowper', u'cows', u'coxcombs', u'cramps', u'created', u'creating', u'creatures', u'cries', u'crimsoned', u'curtsying', u'cutlets', u'danced', u'dances', u'dared', u'darker', u'dartford', u'dashwood', u'dashwoods', u'daughters', u'davies', u'dawdled', u'dawlish', u'dawned', u'dearer', u'dearest', u'debated', u'debts', u'deceived', u'deciding', u'decisions', u'declares', u'declaring', u'declining', u'deemed', u'deeper', u'deepest', u'defects', u'defended', u'deficiencies', u'degrees', u'delaford', u'delayed', u'delays', u'deliberating', u'delicacies', u'delighful', u'delineated', u'delivered', u'demanded', u'demands', u'demonstrations', u'demur', u'denied', u'dennison', u'denoted', u'denoting', u'departing', u'depended', u'depends', u'deprived', u'described', u'describing', u'deserts', u'deserves', u'designs', u'desiring', u'despatch', u'despatching', u'despised', u'despising', u'destroyed', u'destroys', u'detaining', u'detected', u'detecting', u'determining', u'deterred', u'detested', u'devolved', u'died', u'dies', u'differed', u'differing', u'difficulties', u'dimensions', u'diminished', u'dined', u'dinners', u'directing', u'directions', u'disagreements', u'disappeared', u'disappointments', u'disapproved', u'disapproves', u'disapproving', u'discarded', u'discharged', u'disclaiming', u'disclosing', u'discontents', u'discovering', u'discussions', u'disgraced', u'disinherited', u'disliked', u'dismissed', u'dismounted', u'dispatched', u'dispatches', u'dispersing', u'disposing', u'disputes', u'disqualifications', u'disregarded', u'dissembling', u'dissented', u'distresses', u'distrusts', u'diverted', u'doatingly', u'donavan', u'doomed', u'dooming', u'doors', u'dorsetshire', u'doubted', u'doubts', u'douceur', u'downs', u'dr', u'drains', u'drawings', u'draws', u'dreaded', u'dreading', u'dreaming', u'dresses', u'drives', u'dropped', u'drops', u'drury', u'duets', u'duties', u'earlier', u'earliest', u'earned', u'ears', u'echoed', u'editions', u'edtions', u'effected', u'effecting', u'effusions', u'ellison', u'ellisons', u'eloping', u'eluded', u'embellishments', u'embraced', u'embraces', u'employments', u'enabled', u'enamoured', u'encouraged', u'encouragements', u'encroachments', u'encumbered', u'endeavoring', u'endeavors', u'endeavour', u'endeavoured', u'endeavouring', u'endeavours', u'endowed', u'ends', u'endured', u'enfeebled', u'enforcing', u'engagements', u'england', u'enjoyed', u'enjoyments', u'enquired', u'enquiries', u'enquiring', u'ensued', u'ensured', u'entered', u'entertained', u'entitled', u'entreated', u'entreaties', u'entrusted', u'equalled', u'equals', u'erred', u'errors', u'escaped', u'esq', u'establishing', u'esteemed', u'esteeming', u'esteems', u'estimating', u'estranged', u'evenings', u'events', u'evils', u'examined', u'exceeded', u'excellencies', u'exchanged', u'exclaimed', u'exclamations', u'excused', u'excuses', u'exercised', u'exercising', u'exerted', u'exertions', u'exeter', u'exhilarated', u'existed', u'expectations', u'expected', u'expecting', u'expects', u'expenses', u'experiencing', u'explained', u'explanations', u'expressing', u'expressions', u'extolling', u'extorted', u'extorting', u'extremest', u'eyeing', u'eyes', u'faces', u'facts', u'failed', u'falls', u'familiarized', u'families', u'fancying', u'fates', u'fatigued', u'fatigues', u'faults', u'favour', u'favourable', u'favourite', u'favourites', u'fearing', u'fears', u'features', u'feelings', u'feels', u'feet', u'felicitations', u'females', u'ferrars', u'fetches', u'fettered', u'finds', u'finest', u'fingers', u'flattered', u'flatteries', u'flowed', u'fluctuating', u'flushed', u'foibles', u'followed', u'follows', u'fond', u'footsteps', u'forebodings', u'foreplanned', u'foresaw', u'foreseeing', u'foreseen', u'forfeited', u'forfeiting', u'forgave', u'forgiven', u'forms', u'forsaking', u'fortunes', u'forwarded', u'foundations', u'founded', u'fowls', u'friendliest', u'friends', u'frightens', u'froid', u'frosts', u'fulfil', u'fulfilled', u'fullest', u'gained', u'gales', u'gardens', u'garrets', u'gates', u'gathered', u'generations', u'gentlemen', u'gigs', u'gilberts', u'girls', u'gives', u'glances', u'gloried', u'gloves', u'godby', u'goings', u'goodby', u'governed', u'gowns', u'graces', u'grandmothers', u'granted', u'greatest', u'grieves', u'grows', u'guardians', u'guessed', u'guests', u'guided', u'guineas', u'habits', u'hallooing', u'hands', u'handsomer', u'handsomest', u'hang', u'hanover', u'happened', u'happens', u'hardened', u'hardships', u'harley', u'has', u'hastened', u'hastening', u'hated', u'hates', u'hating', u'having', u'hazarded', u'hazarding', u'heads', u'heard', u'hears', u'heightened', u'heightening', u'heights', u'heirs', u'held', u'hens', u'henshawe', u'hesitated', u'hiding', u'hills', u'hinted', u'hints', u'hoarded', u'holborn', u'holburn', u'holds', u'holidays', u'homes', u'honeysuckles', u'honiton', u'honour', u'honourable', u'honourably', u'honoured', u'honours', u'hopes', u'hoping', u'horrors', u'horses', u'hours', u'houses', u'howsever', u'humbled', u'humiliations', u'humored', u'humoured', u'humouring', u'hunted', u'hunters', u'hunts', u'hurrying', u'husbands', u'huswifes', u'ideas', u'idled', u'idolized', u'ii', u'imaginations', u'imagined', u'imagining', u'imbibed', u'immoveable', u'imparted', u'imperfections', u'implied', u'implies', u'impoverished', u'impoverishing', u'improved', u'improvements', u'imputed', u'inclinations', u'inclined', u'inclosing', u'including', u'incommoded', u'inconveniences', u'increased', u'incurred', u'incurring', u'indulged', u'infants', u'inflicted', u'inflicting', u'influenced', u'inforce', u'inforced', u'informing', u'inhabitants', u'inhabiting', u'inheriting', u'injuries', u'inquired', u'inquiries', u'insinuations', u'insisted', u'installed', u'instigated', u'instructions', u'insulted', u'intends', u'intentions', u'intents', u'interests', u'interposed', u'interspersed', u'intervals', u'interviews', u'intimated', u'introduced', u'introducing', u'intruded', u'invented', u'inventing', u'invitations', u'invited', u'irritated', u'irritates', u'issued', u'jealousies', u'jenning', u'jennings', u'jewels', u'jilting', u'joined', u'joked', u'jokes', u'joking', u'joys', u'judged', u'judging', u'judgments', u'jumbled', u'justified', u'keeps', u'keys', u'kicked', u'kinder', u'kindest', u'kingham', u'kissed', u'kisses', u'knees', u'knives', u'knows', u'laboured', u'lamentations', u'lamps', u'lanes', u'languages', u'larger', u'largest', u'lasted', u'laughed', u'laughs', u'leagued', u'legacies', u'lengthened', u'lengths', u'lessened', u'lessening', u'letters', u'letting', u'lies', u'lifted', u'lightened', u'liked', u'likes', u'limbs', u'limits', u'lines', u'lingered', u'lingering', u'lips', u'listened', u'lives', u'livings', u'll', u'lodges', u'loitered', u'lombardy', u'london', u'longed', u'longest', u'longstaple', u'looked', u'looks', u'loved', u'lovers', u'loves', u'lowered', u'lurking', u'magna', u'maids', u'maintained', u'makes', u'mama', u'managed', u'marlborough', u'marriages', u'marries', u'matters', u'maxims', u'meadows', u'meals', u'means', u'meantime', u'measures', u'medicines', u'meditated', u'meditations', u'meetings', u'mentioned', u'mentioning', u'merest', u'merits', u'merrier', u'messages', u'middleton', u'middletons', u'militated', u'minds', u'minutes', u'misapplied', u'misinformed', u'missed', u'misses', u'mistakes', u'mixing', u'modestest', u'mohrs', u'moments', u'months', u'mosquitoes', u'mothers', u'motives', u'moved', u'murmurings', u'muttered', u'nabobs', u'named', u'names', u'natured', u'nearer', u'needed', u'neglected', u'neighbour', u'neighbourhood', u'neighbouring', u'neighbourly', u'neighbours', u'nerves', u'nests', u'nettles', u'newer', u'newspapers', u'nicest', u'nieces', u'nipped', u'nodded', u'nods', u'noisier', u'notes', u'noticed', u'noticing', u'notions', u'nt', u'nurses', u'obeyed', u'objected', u'objections', u'objects', u'obligations', u'observations', u'observed', u'obstacles', u'obstructed', u'obtained', u'obtaining', u'obviated', u'obviating', u'occasioned', u'occasions', u'occupations', u'occupied', u'occurred', u'oddest', u'offence', u'offences', u'offending', u'offered', u'offices', u'oftener', u'oftenest', u'oldest', u'olives', u'omitted', u'ones', u'opened', u'opinions', u'opportunities', u'ordained', u'orders', u'originated', u'ornamented', u'ornaments', u'others', u'outdone', u'outgrown', u'outlived', u'outraged', u'outstaid', u'outstretched', u'outstripped', u'outweighs', u'overcame', u'overcoming', u'overheard', u'overlooked', u'overpowered', u'overspreading', u'overstrained', u'owed', u'owned', u'owners', u'owning', u'paces', u'pacified', u'packages', u'packed', u'pages', u'paid', u'pains', u'palanquins', u'palmers', u'pangs', u'papers', u'parcels', u'parents', u'parlors', u'parlour', u'parrys', u'particulars', u'parties', u'parting', u'partners', u'parts', u'passages', u'passed', u'passions', u'patches', u'patronised', u'patterns', u'paused', u'pausing', u'pearls', u'perceived', u'perfections', u'performances', u'performed', u'performers', u'performing', u'permitting', u'persecutions', u'persevered', u'persisted', u'persons', u'persuading', u'pictures', u'pieces', u'pimples', u'piqued', u'pitched', u'pitied', u'placed', u'placing', u'plaguing', u'planning', u'plans', u'plantations', u'plants', u'played', u'playfellows', u'playing', u'playthings', u'pleasanter', u'pleasantest', u'pleased', u'pleasures', u'plums', u'pointers', u'points', u'ponds', u'poplars', u'popt', u'possesses', u'possessions', u'postponing', u'posts', u'pounds', u'poured', u'powers', u'practices', u'practise', u'practised', u'praised', u'praises', u'prayers', u'pre', u'preceded', u'preferring', u'prejudices', u'premeditated', u'premises', u'prenticed', u'preparations', u'preparing', u'prescribed', u'prescriptions', u'presented', u'presenting', u'presents', u'preserved', u'presided', u'pressed', u'presumed', u'pretence', u'pretends', u'pretensions', u'prettier', u'prettiest', u'prevailed', u'prevailing', u'prevented', u'preyed', u'principles', u'probabilities', u'proceeded', u'proclaimed', u'procured', u'procuring', u'producing', u'professions', u'profited', u'prohibited', u'projects', u'promised', u'promises', u'promontories', u'promoted', u'promoting', u'prompted', u'pronouncing', u'proofs', u'propensities', u'prophecies', u'proposals', u'proposed', u'prospects', u'protestations', u'protested', u'proud', u'provisions', u'provoked', u'publishing', u'pulled', u'puppies', u'purchases', u'purposes', u'pursued', u'pursuing', u'pursuits', u'puts', u'putting', u'qualifications', u'quarrelled', u'quarrelling', u'questions', u'quickened', u'quicker', u'quickest', u'quieted', u'quitting', u'rained', u'raises', u'rambles', u'raptures', u'reached', u'reaped', u'reasonings', u'reasons', u'recalled', u'receiving', u'reckoned', u'reckons', u'reclining', u'recognised', u'recollecting', u'recommended', u'recommending', u'reconciled', u'recovered', u'recovering', u'recreating', u'recurred', u'referred', u'referring', u'refinements', u'reflections', u'refreshed', u'refreshments', u'refused', u'regarded', u'regards', u'regrets', u'regretted', u'regretting', u'rejected', u'rejoiced', u'relating', u'relations', u'relatives', u'released', u'relics', u'relied', u'relinquished', u'relying', u'remained', u'remaining', u'remarks', u'remedies', u'remembered', u'remembering', u'remembers', u'remembrances', u'reminded', u'reminding', u'reminds', u'removes', u'rendered', u'renewed', u'renewing', u'renounced', u'repaid', u'repaired', u'repeating', u'repining', u'replied', u'replying', u'reports', u'representations', u'represented', u'representing', u'reproached', u'reproaches', u'reproaching', u'reproved', u'reproving', u'repulsed', u'requested', u'requesting', u'required', u'requires', u'requiring', u'rescued', u'reseated', u'resembled', u'resembling', u'resented', u'resettled', u'resided', u'residing', u'resisted', u'resists', u'resolving', u'resorted', u'resources', u'respected', u'respects', u'rested', u'restored', u'restoring', u'restraints', u'resumed', u'retailed', u'retained', u'retreated', u'retrenched', u'returning', u'reverted', u'revived', u'rewarded', u'rheumatisms', u'ribbons', u'richardson', u'richardsons', u'richer', u'rings', u'rises', u'risking', u'rivals', u'roads', u'roared', u'robbed', u'rocks', u'rooms', u'roused', u'ruins', u'rumour', u'sackville', u'sacrificed', u'sakes', u'salts', u'sandersons', u'sashes', u'sauntered', u'saves', u'savings', u'says', u'scenes', u'schemes', u'scolded', u'scorning', u'scotland', u'scrawls', u'screamed', u'screams', u'screens', u'scrupled', u'scruples', u'scrupling', u'scrutinies', u'searched', u'seasons', u'seats', u'seconded', u'seconds', u'secrets', u'secured', u'secures', u'securing', u'seduced', u'seemed', u'seems', u'seized', u'sellers', u'sends', u'sensations', u'senses', u'sentences', u'sentiments', u'separated', u'separations', u'servants', u'served', u'services', u'shades', u'shakespeare', u'shared', u'sharing', u'sharpe', u'shew', u'shewed', u'shewing', u'shewn', u'shews', u'shillings', u'shocked', u'shoes', u'shops', u'shoulders', u'showed', u'showers', u'shrubberies', u'shrugging', u'shuddering', u'shutters', u'sighed', u'signs', u'silencing', u'silks', u'simpered', u'simpering', u'simplest', u'simpson', u'sisters', u'situations', u'slightest', u'smallest', u'smiled', u'smiles', u'smirked', u'smokes', u'sobbed', u'sobered', u'sobs', u'softened', u'solicitations', u'somersetshire', u'songs', u'soothings', u'sorrows', u'souls', u'sounds', u'sources', u'spared', u'speaks', u'spends', u'spirits', u'sportsmen', u'sprained', u'spraining', u'spunging', u'spurned', u'stairs', u'stammered', u'stanhill', u'stared', u'stares', u'started', u'startled', u'stating', u'staying', u'steele', u'steeles', u'steepest', u'steps', u'stimulated', u'stirred', u'stockings', u'stopt', u'strains', u'strangers', u'streamed', u'strengthened', u'stretched', u'strictest', u'strikes', u'stronger', u'strongest', u'struggled', u'studies', u'stupified', u'styled', u'subjects', u'submitted', u'submitting', u'subsisted', u'subsisting', u'succeeded', u'succour', u'suffered', u'sufferings', u'suffers', u'suggested', u'suited', u'summits', u'summoned', u'superannuated', u'supplanted', u'supplied', u'supplying', u'supported', u'supports', u'surfaces', u'surpassed', u'surprised', u'survived', u'suspecting', u'suspects', u'suspicions', u'swallowed', u'sweetest', u'sweetmeats', u'syllables', u'sympathised', u'symptoms', u'systems', u'takes', u'talents', u'talked', u'talks', u'tallest', u'tastes', u'taverns', u'tears', u'teazed', u'teazing', u'tells', u'tempers', u'tempted', u'tended', u'tenderest', u'terminated', u'terms', u'thanked', u'things', u'thinks', u'thirds', u'thistles', u'thomson', u'thorns', u'thoughts', u'threatened', u'threats', u'thunderbolts', u'tis', u'tithes', u'traced', u'traded', u'trades', u'traits', u'transacted', u'transgressed', u'travellers', u'travelling', u'treasured', u'treated', u'trees', u'trembled', u'tremour', u'trials', u'tricked', u'tricks', u'tries', u'trifled', u'troubles', u'truest', u'trusted', u'truths', u'twould', u'undergone', u'undervalued', u'unfavourable', u'unites', u'unlover', u'unpleasantest', u'urged', u'ushered', u'using', u'valleys', u'variations', u'varying', u've', u'ventured', u'venturing', u'viewed', u'viewing', u'views', u'vigour', u'villages', u'violins', u'virtues', u'visited', u'visitors', u'visits', u'voices', u'vouchsafed', u'waistcoats', u'waited', u'walked', u'walks', u'walls', u'wandered', u'wanted', u'wants', u'warmest', u'weakened', u'weaknesses', u'weddings', u'weeks', u'welcomed', u'westminster', u'westons', u'wettest', u'weymouth', u'whiled', u'whims', u'whitakers', u'whiter', u'whitwell', u'wildest', u'williams', u'willoughby', u'willoughbys', u'windows', u'winks', u'wiping', u'wisest', u'wishes', u'withdrew', u'witnessed', u'witnesses', u'witnessing', u'witticisms', u'wittiest', u'wives', u'women', u'wondered', u'woods', u'words', u'workmen', u'worlds', u'wrapt', u'writes', u'yards', u'years', u'yielded', u'youngest']\n"
     ]
    }
   ],
   "source": [
    "### 词汇列表语料库\n",
    "# 例2-3. 过滤文 :此程序计算文 的词汇表，\n",
    "# 然后删除所有在现有的词汇列表中出现的元 素，只留下罕见或拼写错误的词。\n",
    "\n",
    "def unusual_words(text):\n",
    "    text_vocab = set(w.lower() for w in text if w.isalpha())\n",
    "    english_vocab = set(w.lower() for w in nltk.corpus.words.words())\n",
    "    unusual = text_vocab.difference(english_vocab)\n",
    "    return sorted(unusual)\n",
    "print(unusual_words(nltk.corpus.gutenberg.words('austen-sense.txt')))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 停用词\n",
    "from nltk.corpus import stopwords\n",
    "stopwords.words('english')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAG4dJREFUeJzt3Xl0nXd95/H3R4tt2bKkxI6JYstWmISEJNjRErZmCgVO\nC8xSykCZTIftAB4GugCdnp4z7cC0Zc4sbRkKPZD6sKSZQjoDhIIzbJ3DQEpJGCzbcZyYJG6CbXmN\nF0m2JS+SvvPHvboWipZrWc997k/38zpHJ7r3Pvc+HyeOPvo9v+f3PIoIzMzMAOryDmBmZtXDpWBm\nZiUuBTMzK3EpmJlZiUvBzMxKXApmZlbiUjAzsxKXgpmZlbgUzMyspCHvAJdr9erV0dnZOa/3joyM\n0NTUtLCBMpRS3pSyQlp5U8oKaeVNKStcWd6+vr7jEXHNXNslVwqdnZ1s27ZtXu/t6+ujp6dngRNl\nJ6W8KWWFtPKmlBXSyptSVriyvJL2lbOdDx+ZmVmJS8HMzEpcCmZmVuJSMDOzEpeCmZmVZFYKkjok\n/V9JeyQ9Jum3ptlGkj4haa+kXZK6s8pjZmZzy3KkMAr8dkS8EHgp8H5Jt0zZ5nXAjcWvzcCnswoz\nNh7sG7zI4PDFrHZhZpa8zEohIg5HxPbi96eBPcDaKZv9MnBvFDwMtElqzyLPb963gw995wTffeJo\nFh9vZrYoVGTxmqROoAv40ZSX1gIHJj3uLz53eMr7N1MYSdDe3k5fX99lZ2jjDADf3vYU68fTKIbh\n4eF5/VnzkFJWSCtvSlkhrbwpZYXK5M28FCQ1A18BPhARQ1NfnuYt8ZwnIrYAWwB6e3tjPiv6LrSd\n4AuPPkz/uYZkVjCmtNoypayQVt6UskJaeVPKCpXJm+nZR5IaKRTCFyLi/mk26Qc6Jj1eBxzKIsvG\nda3UAXsOn2bkwlgWuzAzS16WZx8J+CywJyI+NsNmXwfeVjwL6aXAYEQcnmHbK7JiaQMb2hoYGw8e\nPTiYxS7MzJKX5eGjnwPeCjwqaWfxuX8PrAeIiLuBbwCvB/YCw8A7M8zDjVc38szAKNv3n+LF11+d\n5a7MzJKUWSlExA+Yfs5g8jYBvD+rDFO9YNUSvvP0CDv2n6rULs3MklJTK5pvWtUIwPb9AxT6yMzM\nJqupUmhvrqe1qZFnT5/n4MBI3nHMzKpOTZWCJLrWtwGwY/9AzmnMzKpPTZUCQPf6qwCXgpnZdGqu\nFEojhQOebDYzm6rmSmFTRxsSPHZwiPOjXsRmZjZZzZVCy7JGblzTzIWxcR47NPWqG2Zmta3mSgGg\nq8PzCmZm06nNUijOK2z3IjYzs59Rk6XQvaEwUtjpkYKZ2c+oyVK44ZpmVi5t4ODACEeHzuUdx8ys\natRkKdTViU0dE4vYfAjJzGxCTZYC4JXNZmbTqNlS8MpmM7PnqtlSuL14+GjXwQEujo3nnMbMrDrU\nbClctWIJ169ewbmL4/zk8Om845iZVYWaLQXwdZDMzKaq8VIozCts3+dSMDODWi+FidNSD3iy2cwM\narwUbr52JU2N9ew7McyJM+fzjmNmlruaLoWG+jo2rmsFYKdHC2ZmtV0KMGlewSubzcxcCt1e2Wxm\nVlLzpXB7sRQeOTDA2HjknMbMLF81XwprVi5j3VVNnL0wxpNHvYjNzGpbzZcCXJpX8CEkM6t1LgUm\nzyt4stnMaptLAZ+BZGY2waUA3NLewpKGOv7h2bMMDl/MO46ZWW5cCsCShjpetLa4iK3f8wpmVrsy\nKwVJn5N0TNLuGV5vlbRV0iOSHpP0zqyylGPiOki+OJ6Z1bIsRwr3AK+d5fX3A49HxCbglcCfSlqS\nYZ5Zlc5A8uUuzKyGZVYKEfEgcHK2TYCVkgQ0F7cdzSrPXLo3FEYKO/efYtyL2MysRikiux+AkjqB\nByLitmleWwl8HbgZWAm8JSL+9wyfsxnYDNDe3t6zdevWeeUZHh5m+fLlM77+ngeOcXJknD/7pdWs\na2mY1z4W0lx5q0lKWSGtvCllhbTyppQVrixvb29vX0T0zrVdnj/5fgnYCbwK+EfA30r6u4gYmrph\nRGwBtgD09vZGT0/PvHbY19fHbO99yZ4+vrn7COdXXkdPT8e89rGQ5spbTVLKCmnlTSkrpJU3paxQ\nmbx5nn30TuD+KNgLPENh1JCbbq9sNrMal2cp7AdeDSDpecBNwNM55rl0z2YvYjOzGpXZ4SNJ91E4\nq2i1pH7gI0AjQETcDfwRcI+kRwEBvxsRx7PKU47b1rbSUCeePHqaM+dHaV6a/7yCmVklZfZTLyLu\nmuP1Q8AvZrX/+VjWWM8t17Wwq3+QXQcGePkNq/OOZGZWUV7RPEW31yuYWQ1zKUwxMa/glc1mVotc\nClN0dVwaKWS5hsPMrBq5FKbouLqJ1c1LOHn2AvtPDucdx8ysolwKU0ji9g7fX8HMapNLYRqX1it4\nstnMaotLYRpe2WxmtcqlMI2N61qpE+w5PMTIhbG845iZVYxLYRorljZw07UtjI4Hjx4czDuOmVnF\nuBRm0O3rIJlZDXIpzKDL8wpmVoNcCjMorWzef8qL2MysZrgUZvD81StobWrk2OnzHBo8l3ccM7OK\ncCnMQJLvr2BmNcelMIuJ6yBt3+d5BTOrDS6FWZRGCgc8UjCz2uBSmMXt69uQ4LGDQ5wf9SI2M1v8\nXAqzaFnWyA3XNHNhbJzHDg3lHcfMLHMuhTn44nhmVktcCnO4dHE8zyuY2eLnUpiDVzabWS1xKczh\nhjXNNC9t4ODACEeHvIjNzBY3l8Ic6uvE7R2eVzCz2uBSKIPXK5hZrXAplKFUCl7ZbGaLnEuhDBOX\nu9h1cICLY+M5pzEzy45LoQxXrVjC9atXcO7iOE8cOZ13HDOzzLgUytTVcen+CmZmi5VLoUxe2Wxm\ntcClUKYur2w2sxrgUijTzdeuZFljHT89McyJM+fzjmNmlonMSkHS5yQdk7R7lm1eKWmnpMckfT+r\nLAuhob6OjesKh5B2HvAhJDNbnLIcKdwDvHamFyW1AZ8C/nlE3Aq8OcMsC6Lb10Eys0Uus1KIiAeB\nk7Ns8q+A+yNif3H7Y1llWSgTk80+A8nMFitFRHYfLnUCD0TEbdO89nGgEbgVWAn8WUTcO8PnbAY2\nA7S3t/ds3bp1XnmGh4dZvnz5vN4LcOrcGO/e+izLGsS9b1hDvTTvzyrHleatpJSyQlp5U8oKaeVN\nKStcWd7e3t6+iOida7uGeX36wmgAeoBXA03AQ5Iejognp24YEVuALQC9vb3R09Mzrx329fUx3/dO\nWPeD79J/aoSWdS/g5mtbruiz5rIQeSslpayQVt6UskJaeVPKCpXJm+fZR/3AtyLibEQcBx4ENuWY\npywTp6Zu93WQzGwRyrMUvgb8Y0kNkpYDLwH25JinLF2ly2h7XsHMFp/MDh9Jug94JbBaUj/wEQpz\nCETE3RGxR9K3gF3AOPCZiJjx9NVq0b2heAaST0s1s0XosktB0lVAR0Tsmm27iLhrrs+KiD8G/vhy\nM+TplvYWljTUsffYGQZHLtLa1Jh3JDOzBVPW4SNJ35PUIulq4BHg85I+lm206rSkoY7britMMHsR\nm5ktNuXOKbRGxBDwRuDzEdEDvCa7WNXN10Eys8Wq3FJokNQO/CrwQIZ5kuCVzWa2WJVbCn8AfBvY\nGxE/lvR84KnsYlW3S5fRPsX4eHaL/8zMKq3ciebDEbFx4kFEPF2rcwoA7a3LeF7LUo4Onefp42e5\nYU1z3pHMzBZEuSOFT5b5XE2QNOkQkucVzGzxmHWkIOllwMuBayR9aNJLLUB9lsGqXdf6Nr65+wjb\n9w/w5t6OvOOYmS2IuQ4fLQGai9utnPT8EPCmrEKlwGcgmdliNGspRMT3ge9Luici9lUoUxJetLaV\nhjrx5NHTnDk/SvPSPK8taGa2MMqdU1gqaYuk70j67sRXpsmq3LLGem65roXxgF1exGZmi0S5v95+\nCbgb+Awwll2ctHR1tLGrf5AdBwZ4+Q2r845jZnbFyi2F0Yj4dKZJEtS94Sr+8qF9nlcws0Wj3MNH\nWyW9T1K7pKsnvjJNloCujksrm7O8g52ZWaWUO1J4e/GfvzPpuQCev7Bx0tJxdROrVizhxNkL7D85\nzIZVK/KOZGZ2RcoaKUTE9dN81XQhQGERW5evg2Rmi0hZIwVJb5vu+Yi4d2HjpKdrfRv/Z89Rduw/\nxRu61uYdx8zsipR7+OiOSd8vA14NbAdcCsWL4233SMHMFoGySiEifmPyY0mtwP/IJFFiNq1ro06w\n5/AQIxfGaFpS01f/MLPElXv20VTDwI0LGSRVK5Y2cNO1LYyOB7sPDeYdx8zsipQ7p7CVwtlGULgQ\n3guB/5VVqNR0rW9jz+Ehtu87xR2dNX+mrpklrNw5hT+Z9P0osC8i+jPIk6Sujja++KP9PgPJzJJX\n7imp3wd+QuFKqVcBF7IMlZruDYXTUrfvP+VFbGaWtLJKQdKvAv8PeDOF+zT/SFJNXzp7sutXraC1\nqZFjp89zaPBc3nHMzOat3Inm3wPuiIi3R8TbgBcD/yG7WGmpqxO3d1y6b7OZWarKLYW6iDg26fGJ\ny3hvTej2ymYzWwTKnWj+lqRvA/cVH78F+EY2kdJ0aRGbRwpmlq657tF8A/C8iPgdSW8E7gQEPAR8\noQL5krGpePjosYNDnB8dY2mDF7GZWXrmOgT0ceA0QETcHxEfiogPUhglfDzrcClpbWrkxjXNXBgb\n5/FDQ3nHMTObl7lKoTMidk19MiK2AZ2ZJErYxCEkzyuYWarmKoVls7zWtJBBFoOJy2h7XsHMUjVX\nKfxY0numPinpXUDfbG+U9DlJxyTtnmO7OySNLYZ1Dx4pmFnq5jr76APAVyX9GpdKoBdYAvzKHO+9\nB/hzZrm8tqR64L8C3y4nbLW7cc1Kmpc2cHBghGND51jTMttAy8ys+sw6UoiIoxHxcuAPgJ8Wv/4g\nIl4WEUfmeO+DwMk59v8bwFeAY3Nsl4T6OrGpoxXw/RXMLE3K8lo9kjqBByLitmleWwt8EXgV8Nni\ndl+e4XM2A5sB2tvbe7Zu3TqvPMPDwyxfvnxe7y3XF3ef5it7zvKGm1bw1o0rr+izKpF3oaSUFdLK\nm1JWSCtvSlnhyvL29vb2RUTvXNuVu3gtCx8HfjcixiTNumFEbAG2APT29kZPT8+8dtjX18d831uu\nwRVH+cqebRy6sPSK91WJvAslpayQVt6UskJaeVPKCpXJm2cp9AJ/XSyE1cDrJY1GxN/kmOmK3d5R\nOANpV/8AF8fGaaz31UDMLB25/cSKiOsjojMiOoEvA+9LvRAArl6xhM5Vyzl3cZwnjpzOO46Z2WXJ\nrBQk3Ufhchg3SeqX9C5J75X03qz2WS0uXRzP6xXMLC2ZHT6KiLsuY9t3ZJUjD13r27h/x0G27x/g\nrS/LO42ZWfl8wDsDXR4pmFmiXAoZuPnalSxrrOOnJ4Y5edZ3LjWzdLgUMtBQX8fGdb4Tm5mlx6WQ\nEV8HycxS5FLISFdxvcKOAx4pmFk6XAoZ6S6OFB45MMjYeHaXEjEzW0guhYysaVnG2rYmzpwf5alj\nXsRmZmlwKWTI8wpmlhqXQoa8stnMUuNSyNDESMH3VjCzVLgUMnTLdS0sqa9j77EzDI5czDuOmdmc\nXAoZWtpQz21rWwB45IBHC2ZW/VwKGZu4DtJ2zyuYWQJcChnzGUhmlhKXQsYmzkDaeWCAcS9iM7Mq\n51LIWHvrMp7XspTBkYs8ffxs3nHMzGblUsiYpEvXQfK8gplVOZdCBXRvKM4r+AwkM6tyLoUKuHQn\nNpeCmVU3l0IF3HZdKw114okjQ5w5P5p3HDOzGbkUKqBpST0vbG9hPGBXv0cLZla9XAoV0u31CmaW\nAJdChXT5iqlmlgCXQoVMXtkc4UVsZladXAoVsv7q5axasYQTZy9w4ORI3nHMzKblUqgQSZPur+BD\nSGZWnVwKFeR5BTOrdi6FCirNK3hls5lVKZdCBW1c10ad4PFDQ5y7OJZ3HDOz53ApVFDz0gZe8LyV\njI4Hjx4czDuOmdlzuBQqrHuD5xXMrHplVgqSPifpmKTdM7z+a5J2Fb9+KGlTVlmqSVdH8QykfZ5X\nMLPqk+VI4R7gtbO8/gzwiojYCPwRsCXDLFVj8j2bvYjNzKpNZqUQEQ8CJ2d5/YcRMXEM5WFgXVZZ\nqsnzV6+gZVkDx06f5/DgubzjmJn9DGX526qkTuCBiLhtju3+HXBzRLx7htc3A5sB2tvbe7Zu3Tqv\nPMPDwyxfvnxe711IH/27k+w4coHffmkbL+9YNuN21ZK3HCllhbTyppQV0sqbUla4sry9vb19EdE7\n13YN8/r0BSTpF4B3AXfOtE1EbKF4eKm3tzd6enrmta++vj7m+96F9IpTT7LjyFMMNFxFT88tM25X\nLXnLkVJWSCtvSlkhrbwpZYXK5M21FCRtBD4DvC4iTuSZpZK8stnMqlVup6RKWg/cD7w1Ip7MK0ce\nbi+egbT70BDnR72IzcyqR5anpN4HPATcJKlf0rskvVfSe4ubfBhYBXxK0k5J27LKUm1amxq5YU0z\nF0bHefzQUN5xzMxKMjt8FBF3zfH6u4FpJ5ZrQVdHG3uPnWHH/oHS4SQzs7x5RXNOSiubfXE8M6si\nLoWclO6tsM+TzWZWPVwKOblxzUqalzZwcGCEY0NexGZm1cGlkJP6OrGpoxXwISQzqx4uhRx1dVy6\nDpKZWTVwKeSodCe2/R4pmFl1cCnkaGIR267+AUbHxnNOY2bmUsjVqualdK5azrmL4/zkyOm845iZ\nuRTy5usgmVk1cSnkzPMKZlZNXAo5617vlc1mVj1cCjm76dqVLGus45njZzl59kLeccysxrkUctZY\nX8fGtYVDSDsPeF7BzPLlUqgCXRs8r2Bm1cGlUAW8stnMqoVLoQp0F89AeuTAIGPjkXMaM6tlLoUq\nsKZlGWvbmjhzfpS9x87kHcfMaphLoUqU7q/gQ0hmliOXQpXwymYzqwYuhSrhlc1mVg1cClXi1uta\nWFJfx1PHzjA4cjHvOGZWo1wKVWJpQz23rm0B4BFf8sLMcuJSqCIT6xV8CMnM8uJSqCLdEyubfbkL\nM8uJS6GKXDoDaYBxL2Izsxy4FKrIda3LWLNyKYMjF3nmxNm845hZDXIpVBFJl+6v4HkFM8uBS6HK\neGWzmeXJpVBlujxSMLMcuRSqzIvWttJQJ544MsTI6HjeccysxmRWCpI+J+mYpN0zvC5Jn5C0V9Iu\nSd1ZZUlJ05J6XtjewnjA3pNe2WxmlZXlSOEe4LWzvP464Mbi12bg0xlmScrEvMJTJ1wKZlZZDVl9\ncEQ8KKlzlk1+Gbg3IgJ4WFKbpPaIOJxVplR0r7+Kex/axw/7z/EX3/+HvOOUpf/gWbadSSMrpJU3\npayQVt6UsgKcPTFCT0+2+8isFMqwFjgw6XF/8bmaL4WeDYXJ5mcGRvnP3/xJzmkuw66EskJaeVPK\nCmnlTSjrjVc38qGM95FnKWia56ZdxitpM4VDTLS3t9PX1zevHQ4PD8/7vZX2my9uZe/xERoa8vxP\nVL7R0dFkskJaeVPKCmnlTSkrQGvDeOY/w/L8t9EPdEx6vA44NN2GEbEF2ALQ29sbPfMcP/X19THf\n91ZaT09aeVPKCmnlTSkrpJU3paxQmbx5npL6deBtxbOQXgoMej7BzCxfmY0UJN0HvBJYLakf+AjQ\nCBARdwPfAF4P7AWGgXdmlcXMzMqT5dlHd83xegDvz2r/ZmZ2+byi2czMSlwKZmZW4lIwM7MSl4KZ\nmZWoMN+bDknPAvvm+fbVwPEFjJO1lPKmlBXSyptSVkgrb0pZ4cryboiIa+baKLlSuBKStkVEb945\nypVS3pSyQlp5U8oKaeVNKStUJq8PH5mZWYlLwczMSmqtFLbkHeAypZQ3payQVt6UskJaeVPKChXI\nW1NzCmZmNrtaGymYmdksXApmZlbiUjAzq1LFWwt0zL3lwln0pSDpg5LW5Z2jHMW/AP9a0oeLj9dL\nenHeuczmImmTpF8vfm3KO89iUbya9N9Ucp/p3Idu/lqAb0s6Cfw18OWIOJpzppl8ChgHXgX8IXAa\n+ApwR56hZiJputvFDgJ9EbGz0nksH5J+C3gPcH/xqb+StCUiPpljrBlJWgr8C6CTST8DI+IP88o0\nh4cl3RERP67Ezmrm7CNJG4G3UPjL0B8Rr8k50nNI2h4R3ZJ2RERX8blHIqIqf/OS9EWgF9hafOqf\nAD8Gbga+FBH/La9sk0n6QUTcKek0P3sfcFH4Zawlp2jPMU3G0ktUWdYJknYBL4uIs8XHK4CHImJj\nvsmmJ+lbFH95AcYmno+IP80t1CwkPQ68gMLlfc5y6e9CJv9+a2GkMOEYcAQ4AazJOctMLkqqp/hD\nQdI1FEYO1WoV0B0RZwAkfQT4MvDzFP6Hq4pSiIg7i/9cmXeWuaSQcRpi0g/X4vfKKUs51kXEa/MO\ncRleV8mdLfpSkPRvKYwQrqHwA+s9EfF4vqlm9Angq8AaSf8JeBPw+/lGmtV64MKkxxcpXHRrRNL5\nnDJZ5X0e+JGkrxYfvwH4bI555vJDSS+KiEfzDlKOiJjvBUDnZdGXArAB+EAKx7gj4guS+oBXU/hN\n6w0RsSfnWLP5IoXjnV8rPv5nwH3FwwfVWry2wCLiY5K+B9xJ4e/tOyNiR76pZnUn8A5JzwDnyfhw\nTGpqZk7BsiGph0s/DH4QEdtyjmQ2K0kbpnu+0r+RVyuXgpmZlSz6dQpmZlY+l4KZmZW4FKymSfo9\nSY9J2iVpp6SXZLiv70lK5i5fVptq4ewjs2lJehnwTymstTgvaTWwJOdYZrnySMFqWTtwPCLOA0TE\n8Yg4JOnDkn4sabekLZIEpd/0/7ukByXtkXSHpPslPSXpo8VtOiX9RNJfFkcfX5a0fOqOJf2ipIck\nbZf0JUnNxef/i6THi+/9kwr+uzADXApW274DdEh6UtKnJL2i+PyfR8QdEXEb0ERhNDHhQkT8PHA3\n8DXg/cBtFM57X1Xc5iZgS/G89yHgfZN3WhyR/D7wmojoBrYBH5J0NfArwK3F9340gz+z2axcClaz\nipfn6AE2A88C/1PSO4BfkPQjSY9SuDjhrZPe9vXiPx8FHouIw8WRxtPAxCWOD0TE3xe//ysK6zgm\neylwC/D3knYCb6ewyHIIOAd8RtIbgeEF+8OalclzClbTImIM+B7wvWIJ/BtgI9AbEQck/Udg2aS3\nTFy+Y3zS9xOPJ/5/mrr4Z+pjAX8bEXdNzVO8VPqrgX8J/DqFUjKrGI8UrGZJuknSjZOeuh14ovj9\n8eJx/jfN46PXFyexAe4CfjDl9YeBn5N0QzHHckkvKO6vNSK+AXygmMesojxSsFrWDHxSUhswCuyl\ncChpgMLhoZ9SuBT45doDvF3SXwBPAZ+e/GJEPFs8THVf8dr+UJhjOA18TdIyCqOJD85j32ZXxJe5\nMFtAkjqBB4qT1GbJ8eEjMzMr8UjBzMxKPFIwM7MSl4KZmZW4FMzMrMSlYGZmJS4FMzMr+f8lskLV\nTExv6wAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x108faf350>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import nltk\n",
    "puzzle_letters = nltk.FreqDist('egivrvonl')\n",
    "puzzle_letters.plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY8AAAEJCAYAAABsc6siAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsnXd4HNX1sN+76pIlq7ggY2y59wKSCzYGg+mxTU8ghAAf\nCQmhBJIQBxLAQCCEH5DQQjU1dEyxTbUBQ8BVwr13W26y1btWu/f7Y2ZWK3mlLdom+7zPs8/O3rkz\nc2Ytz9lTr9JaIwiCIAj+YIu0AIIgCELHQ5SHIAiC4DeiPARBEAS/EeUhCIIg+I0oD0EQBMFvRHkI\ngiAIfiPKQxAEQfAbUR6CIAiC34jyEARBEPwmNtIChIouXbronJycgI6tra0lKSnpmJkfjTLJPUd+\nfjTKFG3zo1GmQO7BoqCg4LDWuqtPk7XWR+UrNzdXB0p+fv4xNT8c14i2+eG4RkefH45rdPT54bhG\nOO7BAsjXPj5jxW0lCIIg+I0oD0EQBMFvRHkIgiAIfnPUBswFQej42O12CgsLqaurAyA2NpYNGzb4\nfHyo50ejTL7MT0xMpGfPnsTFxfl83iOuE/CRgiAIIaawsJDU1FRycnJQSlFdXU1KSorPx4d6fjiu\nEez5WmuKi4spLCykT58+Pp+3JeK2EgQhaqmrqyMrKwulVKRFOWpQSpGVleWy5gJFlEc7WbevnMKK\nxkiLIQhHLaI4gk8wvtOQKQ+l1EtKqSKl1Fq3sXeUUivN106l1EpzPEcpVeu271m3Y3KVUmuUUluV\nUk+oKPpLamh08rPnljDz25JIiyIIghBWQml5vAKc6z6gtf6Z1nq01no0MBv4wG33Nmuf1vq3buPP\nANcDA8xXs3NGkvJaO1X1jZTWOWlodEZaHEEQQsATTzzBkCFDuPLKK0Ny/pkzZ/LII4/4PH/lypV8\n+umnXud99NFHrF+/vj2itUnIlIfW+jvA409y03r4KfBWW+dQSmUDaVrrxWb142vAhcGWNVAq6uyu\n7Uq3bUEQjh7+85//8Omnn/LGG29EWhQgepSHMp7JITq5UjnAPK318BbjpwKPaa3z3OatAzYDFcDf\ntNb/U0rlAQ9prc80500CZmitp7ZyvesxrBSys7Nz586dG5DcNTU1JCcne523ubiBO7429OOT53ah\nR6pvyWu+nj9c86NRJrnnyM+PBpliY2Pp378/AMPuX+jzef1h3V2TXdtOpxObrek39S233MLrr7/O\ngAEDuPTSS9mxYwfr1q2jsbGRO++8k6lTp/L666/zySef4HA4WL9+PTfffDN2u5233nqLhIQEZs+e\nTWZmJi+//DIvv/wyDQ0N9O3blxdffJHk5GQeeOABOnXqxO9//3u2b9/OH/7wBw4fPkxSUhJPPfUU\nAwYMcMnU0NDAyJEjqa2tpUePHvzxj39kyZIlZGVlcccdd7BgwQIefvhh7rvvPi677DLS0tJIS0vj\njTfeoG/fvs3ue+vWrTQ2No/X5uXlFVjPZW9EKlX3CppbHfuBXlrrYqVULvCRUmoY4Cm+0aq201o/\nDzwPkJeXp3NzcwMSrqCgAF+Ordp8CFgGQK9+gxh1QnpQzx+u+dEok9xz5OdHg0wbNmzwO3XWX9zP\n3zLNddasWXz11Vd8++23PPbYY5x99tm89tprlJWVMXbsWKZOnYpSig0bNrBixQrq6uro378///zn\nP1m1ahW33XYbs2fP5tZbb+WKK67gpptuorq6mn/84x+8/fbb3HzzzcTHxxMfH09KSgq33norzz77\nLAMGDGDp0qX86U9/Yu7cuS6ZUlJSuP/++8nPz+epp54C4LLLLmPMmDFMmTKFP//5z7z//vuMGDGC\n6dOnM3XqVC699FKP9x0fH8+oUaMC/t7CrjyUUrHAxYDrL0ZrXQ/Um9sFSqltwECgEOjpdnhPYF/4\npG2bitomV1WFuK0EIaTsfOgnYanbaI0vv/ySOXPmuOITdXV17N69G4DTTz+d1NRUUlNT6dy5M9Om\nTQNgxIgRrF69GoC1a9fyt7/9jZKSEmpqajjnnHOanb+qqopFixZx2WWXucbq6+u9ypWcnMwLL7zA\nqaeeyr/+9a8jLIxQEQnL40xgo9a60BpQSnUFSrTWDqVUX4zA+HatdYlSqlIpNR5YCvwSeDICMnuk\nsq7J5KuolXRdQTia0Voze/ZsBg0a1Gz8u+++IyEhwfXZZrO5PttsNpdr6JprruGjjz6if//+vPfe\neyxcuLDZeZxOJ+np6axcubLZeHV1tVfZ1qxZQ1ZWFvv2he+3dShTdd8CFgODlFKFSqnrzF2Xc2Sg\n/FRgtVJqFfA+8FuttRVsvwF4EdgKbAM+C5XM/uJubYjlIQhHN+eccw5PPvkkVpx4xYoVfh1fWVlJ\ndnY2drvdY/A9LS2NPn368N577wGGslq1atUR81JTU6msrHR93rVrF48++igrVqzgs88+Y/ny5R7n\nBZtQZltdobXO1lrHaa17aq1nmePXaK2fbTF3ttZ6mNZ6lNb6JK31XLd9+Vrr4Vrrflrrm3QoI/x+\n4u62kmwrQTi6ueuuu7Db7YwcOZLhw4dz1113+XX8/fffz7hx45g2bRqDBw/2OOeNN95g1qxZjBo1\nimHDhvHxxx8DMGfOHO6++27AcJGtX7+e0aNH884773DdddfxyCOP0KNHD2bNmsWNN95IXV0dl19+\nOf/3f//HiSeeyLZt29p38x6Q3lbtoJnlIW4rQTgq2blzp2v7ueeeO2L/L37xi2ZxFff511xzDddc\ncw0AN9xwAzfccMMRcZiZM2e6tvv06cPnn3/e7PzV1dVMnz6d6dOnA5CZmemyLgB+9rOfubZzc3NZ\ntmwZiYmJTJw4sWPWeRwLNIt5iOUhCMIxhCiPdtAs26pWlIcgCMcOojzaQUUzy0PcVoIgHDuI8mgH\nYnkIgnCsIsqjHUiqriAIxyqiPNqBe8C8UtxWgiAcQ4jyCBC7w0lNg8P1WdxWgiC0ZOHChUyd6rGP\nq0fKysr4z3/+43Wer511Q4kojwCxLI20xFhsQHWDg0aHrOkhCELgdCTlIUWCAWJZGunJ8TgdDqrs\nmsq6RjJS4iMsmSAcpczsjL8tDn2aP7O8zd07d+7k3HPP5ZRTTmHJkiWMGjWKa6+9lnvuuYeioiJe\nfPFFkpKSuPXWW6mtrSUpKYmXX375iB5Y1dXV3HzzzaxatQqn08nMmTO54IILms35y1/+wrZt2xg9\nejRnnXUWEyZM4IknnuDrr7/mwIEDnHbaaSxYsIC7776b2tpavv/+e+64445mhYLhQpRHgFiWR2pi\nLHX1NqrsDirq7KI8BOEoZOvWrbz33ns8//zzjBkzhjfffJPvv//e1WX3jTfe4LvvviM2NpYFCxZw\n5513Mnv27GbneOCBBzjjjDN48sknsdvtjB07ljPPPLNZtflDDz3E2rVrmzVHfOedd3j66af5/PPP\nuffee+nVqxf33Xdfs7bskUCUR4BY2VVpiXHU1BjLjkiLEkEIITPLI9aSvU+fPowYMQKAYcOGMWXK\nFJRSjBgxgl27dlFeXs7VV1/Nli1bUEphtx8ZA7Vauj/88MPYbDZXS/chQ4a0ee1HHnmEcePGMX78\neK644op230uwEOURIJbbKi0ploo4I3QkzREF4ejEW8v1u+66i9NPP50PP/yQnTt3Mnny5CPOYbV0\n79mzp18Kbd++fdhsNg4ePHjESoeRJDqk6IC4Wx4pcarZmCAIxxbl5eUcf/zxALzyyise5/jS0r1l\nG/XGxkZuuOEG3nzzTYYMGcJjjz3mcV4kEOURIE0xjzhS4o2vUdxWgnBs8uc//5k77riDiRMn4nA4\nPM6xWrqPGzeuWUv3ffv2cf755wOQlZXFxIkTGT58OLfffjsPPvggEyZMYNKkSTz22GO8+OKLbNiw\n4Yi27JFA3FYB4u62EstDEI5ecnJyWLt2reuzu2WRk5PD8uXLSUlJYfPmza7x+++/H4DJkye7XFhJ\nSUk899xzR8RhevTo0Szt9s0332x2fWslwdTUVDZu3Ogad2/LHgnE8giQCledRxwpcZblIcpDEIRj\nA1EeAdJkecSRHG9ZHuK2EgTh2ECUR4A0BcxjSbbcVmJ5CELQiaKVp48agvGdhkx5KKVeUkoVKaXW\nuo3NVErtVUqtNF/nu+27Qym1VSm1SSl1jtv4uebYVqXUX0Ilr79UuAfMLbeVWB6CEFQSExMpLi4W\nBRJEtNYUFxeTmJjYrvOEMmD+CvAU8FqL8X9prR9xH1BKDQUuB4YBPYAFSqmB5u6ngbOAQmC5UmqO\n1jp0C/P6iATMBSH09OzZk8LCQg4dOgRAQ0MD8fG+d3EI9fxolMmX+YmJifTs2dPnc3oiZMpDa/2d\nUirHx+kXAG9rreuBHUqprcBYc99WrfV2AKXU2+bciCuPSveAebwEzAUhFMTFxdGnTx/X54KCAkaN\nGuXz8aGeH40yBXIPgaBCaQ6aymOe1nq4+XkmcA1QAeQDf9RalyqlngKWaK3/a86bBXxmnuZcrfWv\nzPGrgHFa65taud71wPUA2dnZuXPnzg1I7pqaGpKTk9ucc9WHB6lp1Lx2QTcOV9Twh2+q6JJs47mf\ndAvK+cM5PxplknuO/PxolCna5kejTIHcg0VeXl6B1jrPp8la65C9gBxgrdvn7kAMRqzlAeAlc/xp\n4Bdu82YBlwCXAS+6jV8FPOnLtXNzc3Wg5Ofnt7nf4XDqnL/M071nzNONDqf+dtEy3XvGPD387s+D\ncv5wzw/HNaJtfjiu0dHnh+MaHX1+OK4RjnuwAPK1j8/3sBYJaq0PWttKqReAeebHQuAEt6k9gX3m\ndmvjEaOyvhGtITUhlhibItGMeVTWN+JwamJsKsISCoIghJawpuoqpbLdPl4EWJlYc4DLlVIJSqk+\nwABgGbAcGKCU6qOUiscIqs8Jp8yecK/xAIhRitQEQw9X1UvGlSAIRz8hszyUUm8Bk4EuSqlC4B5g\nslJqNKCBncBvALTW65RS72IEwhuBG7XWDvM8NwFfYLi7XtJarwuVzL5iZVWlJjZ9fWlJcVTWN1JR\na6ezqVQEQRCOVkKZbeWp8fysNuY/gBEHaTn+KRDZ9RZbYDVATEtsUhKWIpF0XUEQjgWkMWIAWOt2\npCU1tzyg43XW1VqzeFsxjQ2y/rogCL4j7UkCwL0pooW13dEsjx+2FvPzF5fy+prIrg0gCELHQpRH\nALQMmBvbsc32dRR2Fhvtng9UeV6DQBAEwROiPALAvSmiRZPl0bHcVqXVDQBU1ovbShAE3xHlEQDu\nqwhaWIqko61jXmwpD4l5CILgB6I8AsC9KaJFRw2Yl9aI5SEIgv+I8giAJrdVxw+Yl5iWR4MTahsk\n7iEIgm+I8ggAV53HURAwtywPgBK3bUEQhLYQ5REAHivMO6jlUVptd9sW5SEIgm+I8giASk91Hh00\n5lHipjBKxfIQBMFHRHkEgCvmkXRke5LK+o5jedQ2OKi1N8U5Sms6juyCIEQWUR5+orV2xTU8uq06\nkOXR0tIQt5UgCL4iysNPqhscODUkx8cQF9P09aW61Xk4naFbnTGYlLRQFi0/C4IgtIYoDz+p9BAs\nB4iNsZESH4NTQ3VDx7A+WloeZRLzEATBR0R5+ImnduwWrqB5B2lRYlka1sKHJRLzEATBR0R5+Imn\nYLlFU9yjYzyErRjHCZnJgFgegiD4jigPP3G1Jkk8cikU14JQHUR5WJZG3y4pxmeJeQiC4COiPPzE\nU1NEC8saqewgbivL8ujXtRMAZeK2EgTBR0R5+EmFh1UELdI62FK0VjuSft0M5SGWhyAIvhIy5aGU\nekkpVaSUWus29n9KqY1KqdVKqQ+VUunmeI5SqlYptdJ8Pet2TK5Sao1SaqtS6gmllAqVzL7Q5LZq\nI2DeQdxWluXRMyOJWAW1dgd1dmmOKAiCd0JpebwCnNtibD4wXGs9EtgM3OG2b5vWerT5+q3b+DPA\n9cAA89XynGHFtQRtWwHzDuK2siyNzJR4UhOMPwVpUSIIgi+ETHlorb8DSlqMfam1tp6sS4CebZ1D\nKZUNpGmtF2utNfAacGEo5PUVT9XlFh2ts24z5RFvKo/qjiG7IAiRRRnP5BCdXKkcYJ7WeriHfXOB\nd7TW/zXnrcOwRiqAv2mt/6eUygMe0lqfaR4zCZihtZ7ayvWux7BSyM7Ozp07d25ActfU1JCcnOxx\n3yOLy1hcWMcfxndm4glJzeYv2F7DMwUVnJGTxI1jOgd0/nDN11pz+eyDNGp48+Lu3L/wMBtKHNxz\nagYjuydERKZwzo9GmaJtfjTKFG3zo1GmQO7BIi8vr0BrnefTZK11yF5ADrDWw/hfgQ9pUl4JQJa5\nnQvsAdKAMcACt+MmAXN9uXZubq4OlPz8/Fb3/eLFJbr3jHl64aaiI+bPW7VP954xT//29daP93b+\ncM2vqG3QvWfM00Pu+kxrrfXPnpive8+Yp+es3BsxmcI5PxzX6Ojzw3GNjj4/HNcIxz1YAPnax+f7\nkb6XEKOUuhqYCkwxhUVrXQ/Um9sFSqltwECgkOaurZ7AvvBK3Jy26jxcbqsOkG1luacykuMBSDNj\nHlIoKAiCL4Q1VVcpdS4wA5iuta5xG++qlIoxt/tiBMa3a633A5VKqfFmltUvgY/DKXNLfAqYd4DO\nulaabmaKoTw6mTGPEol5CILgAyGzPJRSbwGTgS5KqULgHozsqgRgvplxu0QbmVWnAvcppRoBB/Bb\nrbUVbL8BI3MrCfjMfEWM1hojgntvq+h/AFtpuhkpzS0PybYSBMEXQqY8tNZXeBie1crc2cDsVvbl\nA0cE3COB1rrtxogdqD2JK9Mq2bgPSdUVBMEfpMLcD+obnTQ4nMTH2kiMizlif6pbnYcOYRZbMLCU\nhGV5uFJ1pUWJIAg+IMrDD9qqLgeIj7WRFBeDw6mpaYjuSm3L8shyua2Mwn1ZTVAQBF8Q5eEHrr5W\nHuIdFk0rCkZ30Lx1y0OUhyAI3hHl4QdWplWqh0wri44SNG+KeZjKw4p5iOUhCIIPiPLwg7ZqPCw6\nStDcVedhWh7JsYpYm6K6wUF9Y3S73ARBiDyiPPygrRoPiw5jebSo81BKkW5aIbKuhyAI3hDl4Qfe\nAubu+6K9UNBV52EqDGPbkF3W9RAEwRuiPPzACoK36bbqAC1KnE7tCoynJzcpQsuFJUFzQRC8IcrD\nD5pWEWzd8nDVekRxzKO81o5TG0owLqbpT8CyPKQtuyAI3hDl4Qe+Bcyjfx3zlvEOi0yxPARB8BFR\nHn7gW8A8+t1WLftaWVjxD0nXFQTBG6I8/KCtpogWHSFg3rLGw8KlPCTbShAEL4jy8AOfsq06QKpu\ny+pyCwmYC4LgK6I8/MAnt1UHKBK01uxoGfNwBcxFeQiC4AVRHn7gn+URvW4rl+XR0m2VIjEPQRB8\nQ5SHHzSl6vrSGDGaLQ8r26q5EpSYhyAIviLKw0caGp3U2Z3E2BRJHtbysHAPmEfrmh6lLuWR0Gw8\nU7KtBEHwEVEePlLp1o7dXELXI4lxMcTH2mhwOKlvdIZLPL9oqvNobnmkJsZiU1BZ30hDlMouCEJ0\nIMrDR3wJllukRXmVuae+VgA2m3KNldWK9SEIQuuEVHkopV5SShUppda6jWUqpeYrpbaY7xnmuFJK\nPaGU2qqUWq2UOsntmKvN+VuUUleHUubW8CVYbhHthYJNMY/4I/Y1Bc2jU3ZBEKIDv5WHUipDKTXS\nx+mvAOe2GPsL8JXWegDwlfkZ4DxggPm6HnjGvF4mcA8wDhgL3GMpnHBitRtpq0DQwlIw5VFYKGh3\nOKmoa8SmPCtCSdcVBMEXfFIeSqmFSqk080G+CnhZKfWYt+O01t8BJS2GLwBeNbdfBS50G39NGywB\n0pVS2cA5wHytdYnWuhSYz5EKKeQ0LUHr3fKwFEw0Wh7WWh0ZyfHYbEfGbqRFiSAIvqB8yQhSSq3Q\nWp+olPoVcILW+h6l1GqttVcLRCmVA8zTWg83P5dprdPd9pdqrTOUUvOAh7TW35vjXwEzgMlAotb6\n7+b4XUCt1voRD9e6HsNqITs7O3fu3Lle780TNTU1JCcnNxtbsL2GZwoqOCMniRvHdG5z/qOLy1hU\nWMdt4zpzSq8kn87vrzyBzt9dbue2L4vpmRrD4+d2PeKY/+SX89WOWn6Tm8bZfVu/ZiTvIRjzo1Gm\naJsfjTJF2/xolCmQe7DIy8sr0Frn+TRZa+31BawBsoEvgTHm2Gofj80B1rp9Lmuxv9R8/wQ4xW38\nKyAXuB34m9v4XcAfvV03NzdXB0p+fv4RY899u1X3njFP3z93ndf5f5m9WveeMU+/vninz+f3V55A\n5y/aelj3njFPX/bMIo/H/OPTDbr3jHn6qa+3hE2mSMwPxzU6+vxwXKOjzw/HNcJxDxZAvvbhua61\n9jnmcS/wBbBVa71cKdUX2OLjsS05aLqjMN+LzPFC4AS3eT2BfW2MhxWr0WFqBw+YN/W18nwfTWt6\niNtKEITW8VV57Ndaj9Ra/w5Aa70d8BrzaIU5gJUxdTXwsdv4L82sq/FAudZ6P4bSOtsM1GcAZ5tj\nYaXSh+pyi2jurNtWphU0ZVuVSMBcEIQ28FV5POnjWDOUUm8Bi4FBSqlCpdR1wEPAWUqpLcBZ5meA\nT4HtwFbgBcBSVCXA/cBy83WfORZWXHUePlke0dtZt7UaDwsJmAuC4Att/oxWSp0MTAC6KqX+4LYr\nDWi9R4eJ1vqKVnZN8TBXAze2cp6XgJe8XS+UuOo8fCoSjN7Ouq2tImhhVZ1LfytBENrCmw8mHuhk\nzkt1G68ALg2VUNFIRZ33JWgtonkpWm+WR3qyrOkhCIJ32nwSaq2/Bb5VSr2itd4VJpmikqYiwY4d\nMC+p8byWh4U0RxQEwRe8/4w2SFBKPY+Rdus6Rmt9RiiEikaa3Fb+BMyjT3m0tn65RVpSHDZlxHga\nHU5iY6T9mSAIR+Kr8ngPeBZ4EXCETpzoxa/GiFG8IJSVbZXVivKIsSk6J8VRWmOnrNZOl04JHucJ\ngnBs46vyaNRaPxNSSaKYRoeTqvpGlIJO8R3c8mhl/XJ3MlLiKa2xU1rdIMpDEASP+OqTmKuU+p1S\nKtvsiptp9rk6JqiqNyyITgmxHvtBtSQxzkZcjKK+0UmdPXoMtTq7g5oGB/ExNlLiW0+WkxUFBUHw\nhq+Wh1XUd7vbmAb6Blec6KTSjxoPAKUUaYlxFFc3UFnXSGIbKw+GE/fq8rYWtLKUR4kEzQVBaAWf\nlIfWuk+oBYlmyv2o8bBITYw1lYedrqnR4fop8ZKma2G1KCmTdF1BEFrBJ+WhlPqlp3Gt9WvBFSc6\n8afGwyIag+bWAk+tpelaZEqLEkEQvODr03CM23YiRoX4j8CxoTxqfc+0sojGoHmJD8FycCsUFLeV\nIAit4Kvb6mb3z0qpzsDrIZEoCrGaIvqyiqBFNBYKWsog04vbSlqUCILgjUArwGowlos9JvCnKaJF\nNHbWLfZSIGghzREFQfCGrzGPuRjZVWA0RBwCvBsqoaINf5oiWkRjZ90my6Pt+7CUi/S3EgShNXz1\nw7gv+doI7NJaF4ZAnqgkkIB5aoIxtzKKlIevMQ+p8xAEwRs+ua3MBokbMTrrZgDH1E9Sf+s8wM3y\niCK3VamXhaAsXKsJiuUhCEIr+KQ8lFI/BZYBlwE/BZYqpY6Zluz+NEW0iMaAua91Hp2T4lDKqG9x\nOHWbcwVBODbx9Wn4V2CM1roIQCnVFVgAvB8qwaKJJrdVx07VLfWyEJRFbIyNtMQ4ymvtlNfavc4X\nBOHYw9dsK5ulOEyK/Ti2wxNQnUeUFQlqrX0uEnSfIy1KBEHwhK+Wx+dKqS+At8zPP8NYc/yYoCKQ\nOo8oszyqGxw0OJwkx8f41GsrXVqUCILQBt7WMO8PdNda366Uuhg4BVDAYuCNQC6olBoEvOM21Be4\nG0gHfg0cMsfv1Fp/ah5zB3Adxloit2itvwjk2oESWMA8umIe3pafbUmmNEcUBKENvP2U/jdwJ4DW\n+gPgAwClVJ65b5q/F9RabwJGm+eJAfYCHwLXAv/SWrunBaOUGgpcDgwDegALlFIDtdZh6XXudOqA\nKsxTo2wd8xIfM60sZC1zQRDawlvcIkdrvbrloNY6H2NJ2vYyBdjmZX30C4C3tdb1WusdwFZgbBCu\n7RPVDY04NaTEx/i1JGtKfAw2BTUNDuwOZwgl9A1fazwspEWJIAhtobRuPRVTKbVVa93f330+X1yp\nl4AftdZPKaVmAtcAFUA+8EetdalS6ilgidb6v+Yxs4DPtNZHZHoppa4HrgfIzs7OnTt3bkBy1dTU\nkJycDMChGge//eQQWUk2np/azet8d67++CBVDZqXp3cjLcHmdb4v8gQ6f+GuWp5cVs6pvRL5/bh0\nr8d8sLGKN9ZUccGgFH45MjUkMkVyfjTKFG3zo1GmaJsfjTIFcg8WeXl5BVrrPJ8ma61bfWEEyH/t\nYfw64J22jvX2AuKBwxgxFYDuGK1PbMADwEvm+NPAL9yOmwVc4u38ubm5OlDy8/Nd2xv2l+veM+bp\nsx5b6NN8dyb982vde8Y8veNQlU/z/T2/P/Nf+G6b7j1jnp45Z61Px7y5dJfuPWOevv29lSGTKZLz\nw3GNjj4/HNfo6PPDcY1w3IMFkK99fIZ7c+LfCnyolLoSKLCUk/ngv8gn7dQ652FYHQdNJXbQ2qGU\negGYZ34sBE5wO64nsK+d1/YZV5quH8Fyi2gKmrtqPHwMmDetJhh52QVBiD7aVB7mA32CUup0YLg5\n/InW+usgXPsKmlJ/UUpla633mx8vAtaa23OAN5VSj2EEzAdgVLuHhUCaIlpEU2ddSwn4GvOQ1QQF\nQWgLX9fz+Ab4JlgXVUolA2cBv3EbflgpNRqje+9Oa5/Wep1S6l1gPUZTxht1mDKtILCmiBZproyr\nyP9697WvlYWsJigIQlv4/0QMAlrrGiCrxdhVbcx/ACMOEnYsyyM1ALeVldobDW4rX/taWVipumWS\nbSUIggeOmRYjgeIqEPSjKaJFNHXWLfGxr5WFe4W5U5ojCoLQAlEeXgikKaKFK+YRBZaHq8I8xbf7\niIuxkZocgRjXAAAgAElEQVQYi1NHh/yCIEQXojy8EEhTRAtXtlWE+1s5ndqVbeWr2wqkOaIgCK0j\nysMLwbE8Iuu2qqiz49RGDCbOjyr5DGlRIghCK4jy8IIV8/Cnr5VFU8wjspaHv32tLFwrCkqthyAI\nLRDl4QWX5RGA28pSOJFujujrIlAtyZB0XUEQWkGUhxdcRYLtqPOIdMDZKhD0tbrcIsOVrivKQxCE\n5ojy8EJFXccPmDdlWvmnPJoC5uK2EgShOaI82kDrwNbysIiWpWj9rfGwkNUEBUFoDVEebVBnd2J3\naBJibSTEel+6tSWd4mNRCqrqG2mM4Joe/q4iaCGrCQqC0BqiPNqgPcFyAJtNkZpgWCxV9ZGzPpqy\nrfy7D2lRIghCa4jyaIP2BMstoqFFSSAFgiDNEQVBaB1RHm1QURd4U0SL1CjIuGpvnYfEPARBaIko\njzZoT6aVRVoUdNa11iH3N9sq3VVhbpfmiIIgNEOURxscLW6r4qp6wP86j/hYG6kJsTicOuKFjoIg\nRBeiPNogOJZHZN1WdoeTirpGbCqw+0g3g+zS30oQBHdEebRBk+XRDuUR4UJBK1MqPTmeGJvy+3hX\nuq4oD0EQ3BDl0QbtaYpoEenOuk2ZVoEpwHRpUSIIggdEebRBe+s83I+N1DrmgWZaWUiLEkEQPBEx\n5aGU2qmUWqOUWqmUyjfHMpVS85VSW8z3DHNcKaWeUEptVUqtVkqdFA4ZgxEwd61jHqGAeaDV5RbS\nokQQBE9E2vI4XWs9WmudZ37+C/CV1noA8JX5GeA8YID5uh54JhzCHQ0BcytWkdUpQMtDWpQIguCB\nSCuPllwAvGpuvwpc6Db+mjZYAqQrpbJDLUxwUnUjGzBvt+WR0lTrIQiCYKG0jkzxl1JqB1AKaOA5\nrfXzSqkyrXW625xSrXWGUmoe8JDW+ntz/CtghtY6v8U5r8ewTMjOzs6dO3duQLLV1NSQnJzMLZ8f\nYm+lg3+f04UT0lpXINZ8T2wvtXP7gmJyOsfy6NldvM739/ze5r+8soJ5W2q4elQq0wem+H2NRXvq\neHRJGeOOT+DPEzKCIlM0zI9GmaJtfjTKFG3zo1GmQO7BIi8vr8DNE9Q2WuuIvIAe5ns3YBVwKlDW\nYk6p+f4JcIrb+FdAblvnz83N1YGSn5+vtdY67+/zde8Z8/SB8lqf5ntid3G17j1jnp7wj698mu/v\n+b3Nv/XtFbr3jHn6/fw9AV3jh62HdO8Z8/Rlzy4KmkzRMD8c1wjp/K8f1LUPD9O6Yn/I5AnkmGNt\nfjiuEY57sADytY/P8Ii5rbTW+8z3IuBDYCxw0HJHme9F5vRC4AS3w3sC+0ItY1DqPCId8/CWbVW6\nC2adQ8a+hR53y2qCUYjTAUufJbF6D6x8M9LSCCFEN/1gjjoiojyUUilKqVRrGzgbWAvMAa42p10N\nfGxuzwF+aWZdjQfKtdb7QyljfaOD+kYncTGKxLjAv6ZOiU0t2SPRH8pV59Ga8vjuYdizhO7b3vG4\nW1J1o5D9q6CuzNheOzuysgghw+HUnPf4//jLVyVRqUACjwS3j+7Ah0opS4Y3tdafK6WWA+8qpa4D\ndgOXmfM/Bc4HtgI1wLWhFrCpQDAOU86AiLEpOiXEUlXfSFVDY7usmEBwWR6eAuaVB2D1uwAkl2+B\nugpITGs2xT1VV2vdru9CCBLbv2naPrgWijZAtyGRk0cICRv2V7DxQCUA2w9X069rpwhL1JyIKA+t\n9XZglIfxYmCKh3EN3BgG0VwEI9PKIi3RUB4VtfawK4+m9cs9XHfpc+Aw9ivthD1LYcBZzaYkxMaQ\nEh9DdYODyvrwKz/BA9sXAtCQ2JX4ukOw5n2YcldkZRKCztIdJa7t5TtKok55RFuqbtQQjBoPi0h1\n1q2zO6hucBAXY1g/zaivgvxZxnbOJON95/cez+NqUSKuq8jTUAO7lwCKPcN+Z4ytfR+i0K0htI9l\nO4rdtkvamBkZRHm0QjCC5RaRCppbTREzkuOPdDeteB3qyuGE8TDhFmNs1w8ezyMrCkYRuxcb1mL2\nKMqyT4FOx0HpTthbEGnJhCCitW6mMJbtFOXRYWhaRTAIbqsIFQoWV5vreLQMljsaYfF/jO0JN0Ov\n8WhssG8FNFQfcR4r7iFt2aMAK97RdzKoGBh+ifF5zfuRkkgIAVuLqiitsdMtNYHkOEVhaS37y2sj\nLVYzRHm0ghUwD6blEe4FlUqrmyyPZqz/CMp3Q2Y/GHQ+JKZR03kAOBuNuEcLLOVTKi1KIo8Z76Df\n6cb7CFN5rPvASOEVjgqseMe4vlkMyjKeH9HmuhLl0Qout1VS+y2P1AgtRWu5mZpZHlrDoieM7Qk3\ngc34E6jMGmmM7TzSdZWRLC1KooKqQ3BgDcQmGu5GgB4nQUYfqDoIO/8XWfmEoGEpirF9Mhna1fj/\ntzzKXFeiPFrB1Y49GJZHhALmHjOtdv7PqBNI7gKjrnANV2WZyW8e4h4u5SGWR2TZ8a3x3utk5m8p\n5/kfy6lrdMKIS41xcV0dFbjHO8b1yWRoF+P/n1geHQTrQR+UmEeEAuYeazwWPWm8j70e4pJcw1VZ\nIwFlBF7tzX2rmabykYB5hDHjHbrvZGbOWccX22r5bO1+GGGWQ22YA431kZNPCAp7Smo5UFFHRnIc\n/bt2ol9GHPGxNjYfrIqqH3CiPFqhMggLQVlEKmBe2tJtVbQBtnwJsUkw5lfN5jriOsFxw41MnsLl\nzfbJaoJRgNawbSEAu9LHsbfMUPDz1x+EroOg+wgje27rgggKKQSDpWaK7picTGw2RVyMYvQJRr/Y\n/F2lkRStGaI8WqEiBAHzSFkertYki54y3k+8ElKyjjyg9ynGe4u4R1OLElEeEaN4G1QUQnIWnx3q\n4hr+dtMh6hsdTYFzcV11eNzjHRZjc4ztaIp7iPJohaaAeQeOebhbHhX7YfU7gILxv/N8QM5E471F\nsWBTixIJmEcMK0W3z2l8s+kwADEKqhscLNpW3JSyu+kzowBU6LBYNR3j+jT9wBtjKpJoinuI8miF\npvXLgxfzqKwPt+Xhlqq77Dlw2mHINMjq5/mAXhOM98LlYK9zDYvlEQWYKbo1PSdRsLuUWJvivP7G\nmg3z1x+E9F5GBlZjLWz6NIKCCu3hQHkdu4pr6JQQy5DsVNd4bu8MbArW7i2npiEyS1q3RJRHK7g3\nRmwvkVrH3AquZcU1wPKXjEGrmtwTKVnQbRg46ptVLDe1ZbdHZXfPox5HI+ww0nB/YAQOp2ZMTian\n9TYSHuavP2h0bJasqw6PZXXk9s4gNsYGPzxOt+2z6ZQQy7AenWl0albsLouwlAaiPFohqI0Rk8If\n89BaN61fvvkdqC+HXifDCWPaPtByXbml7CbGxZAUF0ODw0l1gxSihZ19K4x/v8y+fLrb+Fs6Y3A3\n+qTHcnx6Eocq61lVWAZDLzSqzrd9BTXR494QfMfqZzW2TyYc3gLz7+aEdU9D2W7G5ESX60qUhwcc\nTk11gwObgpT4IBYJ1obvl3tNg4OGRiepcZr45c8agxNu9n5gb89xjwyrRUkUuq6e/24bV354kDWF\n5ZEWJTSYLitn39NZuMlYH+30wd1QSnHmkG6A6brq1NVoW+JshPUfez6XENW413ew9oOmHes+ZGwf\nYxnoaAmai/LwQI3deMCnJsZhs7V//Yq4GBvJ8TE4NWH75W7FJy5OLIDyPZDVHwae5/1AS3nsWQaN\nTYrCytiKtv5Whyrr+df8LdQ1av67ZFekxQkNZrB8R+oYSmvs9MpMpl9XYz36s4YeB5jKA8R11YEp\nqW5g88EqEmJtjOjZ2Wg5Y7H2A/JMy2PF7jLsDmeEpGxClIcHqu3GP0wwCgQtXOm6Yar1MB7ymquc\n5i/Qk5takbRJp67QZZAReN23wjXs6m8VZRlXzyzcRq3dUMifrd1vpK0eTdRXGYpc2fi0qj9guKys\nLsnj+maSmhjLlqIqdhyuhsFTISbBcDtWhHylZiGIWBbFib3SSSjeBIc2QmI6jpgk2L+SLvWF9O2a\nQq3dwdq9kbeyRXl4wLI8grnwkZW1Fa7miCXVDZxsW09/xzZI6dqsFYlXXHGPJtdVehS2KNlfXst/\nlxrWRmaijYq6Rv63+XCEpQoyuxYZWXI9TuSzrUYG3OmDu7l2x8XYOGOw5bo6YKwEOfAcQDd3ewhR\nT1N9Rxas+9AYHDqdsuPM/4/rPjTcWURH3EOUhwcsyyMYaboWqWEuFCytaeA3MfOMD2Ovh7hE3w92\nxT2aguaZZswjmtJ1n/p6Kw2NTn4yMpvzBhhpq3NWHWW/ts14R9Xxk1i/v4KkuBjXA8TirKHdAU+u\nq/fCJaUQBFzxjpyMJpfVsIsp7THZ2F73oStoHg1xj7ArD6XUCUqpb5RSG5RS65RSvzfHZyql9iql\nVpqv892OuUMptVUptUkpdU6oZaxuCIHlkRjeFiXOA+uZHLMKu0o4ohWJV3LMSvM9S400UaKvRcme\nkhreWb4Hm4LbzhzAKScYynH++oNRkwcfFMx4xzJldD2e2D+LxLiYZlNOG9iVuBhFwa5SiqvqYcDZ\nkJAG+1calelC1FNZZ2fdvnJibYqTEguheKvRvDRnEhVd8yChMxxcy8lpRjbW8p2lRnp2BImE5dEI\n/FFrPQQYD9yolBpq7vuX1nq0+foUwNx3OTAMOBf4j1IqxtOJg0WT5RFMt1V4LY+B218BYP1x0yE5\ns+3JLUk9zgiwN1QZHXiJvtUEH/9qC41OzYUnHk//bql0S4nlxF7p1NodfLWhKNLiBYfKA1C0HuKS\neb8oG2jusrJITYzj5H5dcGr4amOR0fBy8FRjpwTOOwQFu0pxahjRszNJm8w45dDpEBOLjomHIca/\n53F7PiW7cyLltXa2FEW2k0DYlYfWer/W+kdzuxLYABzfxiEXAG9rreu11juArcDYUMrYlG0VioB5\n6H8Vx9UeYsjhL3BoxfYB1wR2EpfryihOa1pNMPIB822Hqvjgx0JibYrfTxngGp82sgcAc48W19V2\nowW7s9fJLNxWAcDpg45UHgBnH+G6snpdvSfrm3cAXPGOnIymWNWwi5smmNtq3YeM6W2k7EZ6adrg\nPR0DQCmVA5wILAUmAjcppX4J5GNYJ6UYimWJ22GFtKJslFLXA9cDZGdnU1AQ2LrOZTVGW+vq0kM+\nnaOmpsbrvOqySgA2bttF3xybX7L5cn53um15j1jdyCfOsRRWqIDuIVMfTx+gfPWnbE2aRPFB4zvZ\nfaCYgoICv2UK5vzHlpTh1DClTyKHd27k8E5jfi/lQAFfbzzId4uXkxLf9m+jSN6DL/NzVrxPFrC8\noS81DQ56d45l/7b17Pcwv7uZZfbtpoMsWppPgi2NkfHpxBVvYf0371DbeYDf8gTjHo72+cG6xtdr\nDHfU4MqlULYLe0Imq4sToMT8v+ZMZVRcGrGHNzE8bR1zyOTzgq0MjTsyQSSQewiEiCkPpVQnYDZw\nq9a6Qin1DHA/oM33R4H/B3gqtPD4U0pr/TzwPEBeXp7Ozc0NSLZZK74BGhjUtze5uX28zi8oKMDb\ntfKrtsHGjXTK7Epycq3X+f6e30V9JY2ffQbAC41TmTFiMLn9PHTQ9XaN/sfBigfpXL6B3BNHk7C/\nCr77nsaYRHJzc/2Tyd97aGP+hv0V/LDnf8TH2Lj3pyfTIz3JNX9Sbi7j1y9h8fZiDsYdx2W5J4RF\nppDM1xoWrgHgx06TAPjJib3JzR3c6vlHrfyeVYXlVHXqyYRhx0HRZbD8BYY2roPcy/2Wp9330BZO\nJ+wtYO2OUobnToq8PAHOD8Y16uwOtn/wJUrB+albAIgbdRm5eWObz99/Efz4Kpekb+ZBxrOtHE46\n6SRX2nZ77iEQIpJtpZSKw1Acb2itPwDQWh/UWju01k7gBZpcU4WA+1OgJxBSv0SNFfMIptsqKUzr\nmOe/RGxjNatjhrJS92++BK0/dD4eMnKgvgIOrG4qEoxwttW/5m8G4OfjerkUhzvTRxuuqw6fdXVo\nE1Tuh5RuvLPLaJB3hod4hzutZl2tnW08rKOBoo2wYCb8ezjMOpNhX18Nn94OtdHRrykSrNxTRoPD\nyeDuqSRsmmMMDr/4yIlm5+TMnfNIT4rlQEUdhaW1R84LE5HItlLALGCD1voxt/Fst2kXAWvN7TnA\n5UqpBKVUH2AAsCyUMla7VZgHi7CsY15bBv8zvtIX9YVAiyVo/cVtfQ9Xe5Kahog1R1xTWM6X6w+S\nGGfjd6d77gx87rDjiLUpFm0r5nBVB15Vz0zRrTz+FHaW1JKeHMeJvTLaPMSqNv96YxEOp4aeY6Hz\nCVCxF/YsafPYkFJ9GJY8C8+dBv8ZB9//y5Ap7XjDr7DseXgqD1a+dUzGZ6x4xyXd9hndIFJ7GP92\nLck5BVK6oUq2c3F2cbNjI0EkLI+JwFXAGS3Sch9WSq1RSq0GTgduA9BarwPeBdYDnwM3aq1DWkZc\nE4I6j7AEzH94HOrKqMgcxSd1w4GmjrgB4dYkMSkuhoRYG/WNTldFd7h5dP4mAK4+OYduqZ7rVjJS\n4jl1YFccTs1na/Z7nNMhMFN0f4wx1pY/bWBXYry0yhnYvRO9s5Iprm7gx92lRkcBa52PcGdd2etg\n3Ufw5uXw6CD4fIaROpyQBif9Eq79DG5dy4ZTnzMadlYfgo9+Cy+fDwfXhVfWCGMpgClOs65q2EWe\nu0HYYmDoBQBcELsYiGy9RySyrb7XWiut9Uj3tFyt9VVa6xHm+HSt9X63Yx7QWvfTWg/SWn8WahlD\nUucR6lTdin2w5BkAtg74FQ6nYe3ExbTjn9jKuNq1CKV1RFuU5O8sYeGmQ6TEx/Cb01pZj8Rk2ijD\niJ27qoMqD4fd1Zjy/VLjXr25rACUUpw1xHBdfbnugDFoua7WfWg0TAwlWpNSshbm/h4eGQjvXQ2b\nPzOsiQHnwKUvw582w/QnofcEsNmoTetnKJILnzHqGnYvgmcnwRd/hbqK0MobBdgdTgp2laJw0uvA\nl8agJ5eVhblvcMlXgI5oxpVUmHvAqvPoHMw6j1AXCS58yOhHNWQ6e5MGAQQe77DI6G24PerKoGhd\nRFuUPPqlEeu47pQ+Xu/rrKHHkRBrY9nOEvaXR8AnrDWs+C+ph38M7PjCfGiowpk1gM/3xGBThuXh\nC+5xD601dB9u9CqrLSHtUAgzcCoPwDMTGPzDLVDwitFCPnsUnPsQ/HEjXPmu8eCLOzJOhVIw+udw\ncwGM+TWgYfFT8NQYw2I6il1Za/eWU2t3cEHGbmKqDhiLeh3fRrD7hPGQ2oOEqr2Mj9vO9kPVHKqM\njHtWlIcHqkPS28qyPELw6+/QZljxX2Mthyl3U9lgKL92uawscpriHpkpkWlRsmjrYRZvLyYtMZbr\nJvX1Or9TQixTzFbl8yJgfej598DHN9J/8Z9h/Rz/T2DGO/ZkjMPu0JzUK8OluL2R2zuDjOQ4dhbX\nsLWoyngwj7gMgMx9X/svi698+icoWk9DQhZM/D3csBh+8x2MvwE6ebeaAEhKh588Ar/+Bo7Pg6oD\nMPs6eG26kUBwFGK5rH6WnG8MDLvI+DdrDZsNhhnxzKs7Gz9O8iNkfYjyaIHTqak1lUenIGZbWQHz\nyroQrOnx9X2gHXDSVdBlABX1hvJot+UBbq6r75ssjzBWmWuteeRL48Hxm9P6+WwNTh9lFgyuDnPW\n1ff/Ri16HAAbTpzvXwfb/Hxom/GObxqMxgueqspbIzbGxhmDTdeVlXVlujrS938PDTX+yeILG+bB\nhrkQ34mNk56Gs+6D7kO9H9caPUbDdfNh2hOQlAk7voNnJsL8e7A1Ri67KBQs21GCDScnVhkFoc0K\nA1vDnHNKww8onBFzXYnyaEFVQyMa49ertwClPyTEGgFnu0MT1CU99iw3/uPGJsFpfwGgsj6YlkdT\n3CPLTCAIp9tq4aZD/Li7jMyUeK6ZkOPzcZMHdaNTQiyrC8uNVuXhoOBVWHAPThS3NNzIS43nYnM2\noN++0mir7gt1FVCYj1YxvLLPyFD3Jd7hztnDWiiPrH7Q4yRiHLWw+h2/zuWVugoj1RZgyt3Yk/yT\ntVVsNsi92nBlnXS1Ea/54d8M+fbXRvbWUYDDacQsxtk2kFhfDBl9DFefN3rmQedepDYUkac2Ryxo\nLsqjBcFcfrYlluvKiqm0G61hwT3G9vgbIM0IFFeYAf/M9qTpWmT0MVIHa4rpRyEQvoC5u9Xxu8n9\nSEnw/d8kMS7G1bJjXjhqPtbPgXm3AnCP/WqK+07nxYSrea/xVJS9Bt64FA6s9XISjEC5dlDTdTQ7\nq2LI7pzI4ONS/RJl0oAuJMTaWLWnjIMVRht3xt9gvH/xV8PNGSy+uhcq9xluJn8bcPpCciZMfwJ+\ntQC6DiGxZh98fNNREQfZdKCSyrpGfpa03BgYfnHbLisLpVyuq+mxS1i/r4LKMC5xbSHKowVWKm0w\nmyJaWArJiqm0my3zjUV/kjIMP7OJy/IIhttKKZf1MbDeaJIYLrfVF+sOsG5fBd1SE/jF+N5+Hz/N\nrWAwpLUp274xfPPayWP2S3lXncuDF43gd2MzuMPxa75wjoG6cnj9Iu9dbs14x5qEk4Cm5Wb9ITk+\nlkkDugCwYINVMHgZxcdPAXs1vH8t2IPg/tm9FJbPAlus8YC3hbBfac88uPI9GmNTjAyu5S+G7lph\nYtmOYmJp5EyWGgO+uKwsTFfk1NjloI2MrXAjyqMFlgYPZlNEC5fl0RAEy8PpMCp1ASb9yQg2mrhi\nHsFwW4Er7tGrwlhZMByWh0NrHjOryW86o/8Rbch94ZT+XchIjmNLURWbDlYGW0SDwgJ4+0pwNDA7\nbipPOC7i5jP60zsrhSFd4vnFyf24peFGVsSOguoieO3Ctlf4M+MdH1UOBFpvhOiNI6rNlWL3yNsg\nsy8cXAtf3BnQeV00NhgpuWjjh0v3Ye07ny+kn8DuUX80tr/8GxxcH/prhpBlO0uYYFtHJ0c5dBno\n33eYPRoy+pChSxln2xAR15UojxZY2VDBzLSysM4ZFMtjzXtQtA7Seh7hLqgIpuUBroyrriX5gA5L\nzGPRnjo2H6zi+PQkfjam7R5VrREXY+O8EYYrb87KELiuijbCG5eAvZoNXc/jT5WX079bKtef2lSH\ncvs5g+iW2Zkrq37PgdThUL7bUCDVxUfKW3sIDm/GGZfC7IPdiY+1MbG/975knpgypDtKwaKtxVTV\nG3/TzthkuOwViImH/Jfat9LgD4/DoQ2GMjr19sDP4yelPSbDib+AxjrD2guGBRUBtNYs21HCT2xu\nVoc/FqZSLutjmm0xy3eI5RFxXDGPULitzHPWtDfmYa+Dr/9ubJ/x1yNWCbRSdYOSbQXG2h4p3Yiv\nK6af2hdyt1Wjw8k764y1Cm6Z0p+E2MDdIa427auD7Loq2224oWpLqc45k0v2XYnGxgMXDic+tum/\nVUpCLA9dPJIaEplWciv1mYPg8CZD6bQogks7bNRhHMzMw65jGd83i+T4wCzgLp0SyO2VQYPDybeb\nDjXtyB4F5zxobM/9PZRs9//kh7fAdw8b29Me91y7EUrO/Sdk9jPWOpl/T3ivHSS2H66mvKqG82LN\neMewi/w/idk94NyYZawtPEx9Y3g7P4jyaIFVAR6SgHmwYh75s4weON2GwsifHbE7qKm6YMY9DOtj\nnG1jyC2PD1bsZX+Vg5ysZC4+qWe7zjW2Tybd0xLYU1LLyj1Bar5XZbqfKvehe53MjfZbqHHYuDS3\nJ+P6HmkpTOzfhSvG9uKQI5nf8Dd0Rg7sWwFv/7zZL+fUQ0be/vfOEQCcMci3wsDWaHJdHWi+Y8yv\nYMh0o+nle9dCox9FZk6noXQcDYYF0OfUdskYEAmd4NJZYIuDZc/B5i/CL0M7WbajhFNsa0ij2vh/\n3G2w94Na0m0odBlEpqoiz7mG1YXlwRe0DUR5tMAKmAezKaKFdc6a9sQ86srhu0eM7TNnegxSVgY7\n5gGuoPk424aQxjy+2VjEzDnrAM2tZw5sX3sVIMam+MkIa5GoIBQM1pXDfy+Bkm1w3Ag+Gf5vFm6r\nIiM5jjvPH9LqYXeeP5jszoks3BfD24OfhE7HGQttvXet0Y5Ea5fl8d9DRiGkVa8RKJby+HpjEXaH\n29+cUkaLkPReRr8pf369r3jdSNJI6Qpn3d8u+RxObTRwDIQeJ8KUu4ztj34HlQfbJUu4WbajhKkx\nRn8qvwLl7ri5rqbaloS9SaIojxZYAfNgNkW0sM7ZLsvjhyegtgR6TTDWqm5Bo8NJlV1jU0F2vZkd\ndsfbNlBrb6S+MfjZS/9dsotXX3uR2dzOpsRruWD7TDN1tX3Xstq0z1u9L/CHFaAc9fDWFXBgNWT2\npeKSd5j55R4A7jh/SJuWXmpiHA9ebFgUM/9XzZ6pbxpZcps/g49vhINriasvpSGpO6vqutOvawq9\nspIDlhWgb9dO9OuaQkVdI8tbPliS0uHSV4xMqaXPGIV+3qg8CPPNB/a5D/m/vLHJ9kNV3Dt3HaPv\n/ZKrPiri8ucX88gXm/hmYxHl/vwwOflm6HMa1Bw2mipGS9t5H1i5/QBn28x2MW31svKGqXjOjVnO\nj9sPeJkcXCK6kmA00uS2CmXAPMA/8soDsPhpY/usez0G2MrMmE16cnxQixzpOgiSu9C95jC91UEq\nG4JUDIZR1f/SB/MYuOqfvBK/pmnHmneNV1Z/oxPrqJ9DJ/9dOaN6dqZXZjK7S2pYtqOEk31YHOsI\nHHb6FtwHBxcbdS9XfcQ/vinhcFUDY/tkclmud/fa6YO6cclJPZn9YyG3fVPPuz9/H9tr043CPbOI\ncFNyLpQqvwsDW+PsYcfxzMJtfLn+INNarr/ZMxfOvBe+/Ct8/DvIHmlYI63x+QzD8up/VlO3Xh9x\nODULNxXx6uJdfLf5ULN9S7aXsGR7k3Ib0K0TeTkZnNQrg7ycTHKykj2nK9tscNFz8MwEo4p/6TNw\n8j6LpAgAABp+SURBVI1+yRUJiqod9K9cSmp8Lfq4kaistht9tknXgdi7DCXt8HqSdn+LwzkxeIJ6\nQZRHC0Ja5+FK1Q3w1++3/zSaHw6eCid4XsbdikdY628EDaWMTqgb5jDOtoHKhuCkZtaV7KXgldu5\ntvxTYmI0DbGpxJ/+Z9baT2C4c53Rs6t4K8y/G766HwafbyiSvmd4blvtUXTFtFHZPP3NNuau3ue/\n8nDY4eObSD+42LAWrvqQgopU3lq2lrgYxYMXDfe5FuPuqUP5bssh8neV8uruoVx7xZvwxmVQugOA\nT6qNppb+tCRpi7OGdueZhduYv/4gU3ukHTnh5BsN99nmz+H96+DaTyHGw9/Ops+NzrxxyTD1MZ8z\ng8pqGng3fw+vL9nFnhIjvpMQa+PC0cdz1cm9Kdq1GUd6L/J3lVCws5TVe8vZUlTFlqIq3lpmWHVZ\nKfGc1DuDvN4ZdGm006xtYFo2XPA0vH2F4X7LOcW3Ku0Isv5wA1NjjPVVVHusDpO4kZfA1+uZ4vyB\nDfvD14lY3FYtCK3lYbmtArA8Dm812l8oG0y5u9VpVtPCoAXL3clpcl1ZQfmAaaimdv6D6CdPYmLF\nJzixsXfgL4m/bRVMvIX61F5wxt/g1rVwxdsw8Dyjf9f6j42Yw+Oj4NuHoXyvT5ebZva6+mzN/ub+\nf2/s/AGeOxVWv40jJhGunI09ayB3fmBUi//m1H707+Z7BXjn5DgeuNBYa+Xhzzexu/NYo1W5isGp\nbMwu609qQixjcgJzCbVkdM90uqYmsLeslp3lHppyKmW0Q087HgqXNWXxuVNfCZ+Y9RVn/K1t68Rk\n7d5y/vz+KsY9+BUPfrqRPSW1nJCZxJ3nD2bpnVP456UjGX58Zzon2DhraHfuOG8I798wgTUzz2b2\nDRO48/zBnDOsO106xVNc3cD89Qf5x2cb+eP8Ym5+awWFpW49ugafD3nXgdMOs38FDWFqRxMg24oq\nOdNyWQWSZdUSUwGdZSvgx23h6+UmlkcLrGViQ1kkWBNIzMPV/PCXhgupFaw02qD0tWpJ76ag+ez6\nANMCnU5Y9RaNC+4jqdrw0X5rG0vPn/4f/QaPPnJ+TCwMOs94VeyDlW/Aj68ZqbLfPAAL/wEDzqZz\n54ngHN1qlfOg7qkM6NaJLUVVfL/1sPfiu6oi+PIuWP228Tkjhy1DbmNwz1xe+nYbmw5W0iszmZvO\n6O/3V3D2sOOYNqoHc1ftY8bs1bz565+grpnH2wuWcmhLBucP7NLuRAELm01x5pBuvLVsD8v31nOp\np0nJmXDJLHjlJ/DDvyFnEgw4s2n/13+HikIjSD3ut61eq6HRyf921/LgskXNKp5PG9iVqyf05rSB\n3by6UhNiY8jtnUFub2PVRK01u0tqyN9ZyrIdJXz44x7mrtrHl+sO8KtJfbhhcn86JcTC2X83AvmH\nNhoFkNMe9+drCisZRUtJUfVUdxlFSkZO+0+Y2ZeStKFkVqynbsPnkBeeDDixPFrgsjxC0p4ksJhH\ncukG4xd3bCJMvqPNuSXVhvwhsTy6DaUmJo3jVTG2qgAyl7Z/C8+fCh//jtjqA6xx5jCj04MM+v1c\nz4qjJWk9jIK0W1bBVR/C0AuNNvSbP6f/8rvg8dFmQsGRBVNKqaZOu20VDDoaYenz8GSeoThiEozv\n/HdLqM4aQWFpDf9esAWA+y8cHlDlO8C904eRlRLP4u3FvLlsN/SewHuVhkUSaFV5a1hZV8v21bU+\nqffJRs0QwIfXu6rgk0s3wNLnjO95mucWJHvLannki01MeOhr/r20nIJdpaQmxvL/Jvbhmz9N5tX/\nN5YzBncPKAanlKJ3VgqX5Pbkn5eO5PFzuzJtVA/qG508/c02Tn9kIe/m78EZm2QowJgEYz2RQFrh\nh4Giyjom2I0VAxNHe1TlAaHNwHnOgS/Dtky0KI8WhLQxonnOGn9iHlrTc8MLxva43xoPUA+UVjfw\n+pJdvLLI8J0HrbrcHZuNvWknAtCtoo0mf06nEVgt2w37V8O2r+m37E5jXYYDa9ivM7mt4QYe7f0c\nd938G47r7HlJ2bbkoN8Z8NNXjYWGzv47dSnHG9Xb8++Cx4bC3FuNCnA3LNfVl+sPUudpKd09y+GF\nyfDZ7cZiRgPOhhuXwOS/QFwSWmvu+XgdtXYHU0dm+7xAkycyU+K59wIjbvSPTzey7VAV64oMq3Fy\nkJXHhH5dSI6PYUdZI+v3teETn3gb9D0daooN94+9jt6rHgU0TLjJCKibOJ2abzcf4lev5jPpn1/z\n1DdbOVxVT6/OsTx40QiW3jmFu6cNpU+XlKDeS7eUGJ684kRm3zCB0Sekc6iynj+/v5ppT33Pkpps\nox08wJybfXZphpMftxRyhm0lADEj2h/vsMgc+1MATnEWUFReFbTztoW4rdzQWrvak7RZ51FTYgQZ\nty+EHd8xsqoYFnczzP+kTEjOMAKrSZlNY0kZdI5PpxulOOxxULEf7DVGmwV7ndt2rfFqNN/LC0kt\nXgmJ6XDKrc3EqG1wsGDDQT5euZeFmw7RaKahJsaodj3Y2qK46xgGlH7L+LK5MKcUassMRVFnvteW\nGcVnurl1lQ40xCTzRP1UXmw8j4vGDuDFC4YR2173TEoXmHAz6+LHk5tWAkufNTJvCl42Xn0nG0p3\nwNnkdElhZM/OrC4s55uNRbge0dXFRnfiFa8bnzv3gvMegkHnNwsML91bz1cby0hNiOXuqe1Yr8Lk\nJyOymTdsP5+vO8AvXlxKg9PIDPv/7Z17eFTVtcB/a5KZTEJCCCFUQAhGIVR5iAkvpYrotVzUT/Qq\ntlVLva3P8tnWXm2/2tZbb3t9XKkt1qvFd7XWqtX6uFakUlAEVF6CgGBJRBCQBAKJIY+ZzLp/7D0w\njDOTnGQSEt2/7zvfea299zr7nLPX2XufvXZRXlaH444l6M9gSmkRL6/bxfS5b3BsUS+mlPZnSmkR\n44b2PVR78vnggnlw32TTBPTAGeTUVUDB0IPu/mvqm3l65Tb++NZHbN1j+h38GcI5Iwdw6cRifHsq\nKC9vvU+ko5QVF/DsNSfzwrs7uP2V91m/o5avzVvOtOPLuat4KtlbF8JzV8HI5P2DR4K6tS+RLc3s\n6D2agfkdGwAbixQUU5F1PCVNGwh9+CYwJW1xJ6PHGA8RmQb8FsgAHlDV29KdxoHmFloiSiCDw1xM\nEGqEbcuNsahYBDvWAIdqD36AptZ9y2QBb0c/sn/tUbmvXA/ZBbRElKVbqvnr6h3MX7/roN+i6FSl\nM8YOpKh5JxMTjHROB/UDT4bNMCRUAatSuLYI5EIwH4L5aDCff+wt5EfV06miDz+aNoKrTyvx7C02\nJeKD4V81S9UmeHserPnToXtWMBTGX8kFx09m7fb9vLh2B98ujRgfT3//hTF+Pj+ccp1xNBk4fIzF\np01hHlxjvtpvnFZK/94ea0uJVBbhlhknsKxiDzv3myaldP1lFc8Pzyplz94a1le3sKWqni1VlTy4\npJJsfwaTji1kSmkRU4b3Z0hhf7jgfvjDecaBIqBn38XqXU08vnwzL63dSXPYfBgM6pPNNyYMYWb5\n4IMGb+Xeyk7RPxE+nzBj7CC+esJR3P9GBfcu2sIrGz5hTcbF/D1nDbkfvsFRWU9A6RD7QWY/0kKN\nhz7ODttuYMC2SqidbxyPasQuLWaskUZijkfXytH76uHT0oPPO8E+Zp3d59C+PxtEGPjx3wBoLp2R\n9vzYXTydks0bGLR7MXBT2uOPp0cYDxHJAO4B/gXYDrwjIi+oalrdah7sLPdj3EdEC56PlpsHL0pG\nAAZPgJLToOR01m7dw+jjBpvBewf2mjb3g9t74YDdb6ihumoXAW1G/dlEMoJoZhD82fgC2WQEcsjM\n6kUgmENGINv8FukP8lFNiNohl/DcSxt48d0d7I6Zs3jM0fnMGDuIc0YPPPQCr+y80ba+AaO4rvm7\nlPXex8zTxlIvedSSwz7txd6WIFWhbHY3B6lpUmoONFNzIMTHNQfYUl1PIMPH3JljDvY9dBpFpXD2\nHJj6M/Or79vzoOZDmP8TvunPQTJPYdn7JzJ89wuw305vWjIFpt8J/YYdFpWqEmpR7py/ib0NEcYM\n7sM3Jnh3D5+M/nlBbj73eK5/yri7T3d/R5Rji3K58eQCRp84lpVba1i8uYpFm6rYuLOWhe/vZuH7\nu4H1HNOvF6cNL+JbI2cz9L27WdfnDH78sp/1O5YCpiI2pbSIyyYWM6W09Q7wriA7kMF1ZwxjZvlg\n/mf+Jv6yajvfrb+CRwO3M2jTw7Dp4TbHNRDA44y3XwJoxUVYmEzqpRcTInVEEI6a9Fm3Qh2lT/lM\nIpvmMKpplfGbFkzwa3Ya6RHGAxgP/FNVKwBE5EngPCCtxqNx+7vc4/8Nk9kA8+LaDY8aZQqYkikw\nZBIEDrXlhj5Z2eZpN6++dykrttZAK+6EcrMy6dsrQN9eAXbvq2PHqrcOnisuzGHGiYM478SBlBTl\ntinddNE3J8ALkVN4YR/c/HzsmUa7JPYflRsQHrp8AuOPSc8vqG0iu49pq594jfF/9NZ9+CoXMytz\nAbNYAPthtxQyN/NyFmybSPjeDwlHKgm3RAhZ1xmxI9J9Av99/si0F5jnjx3Ehh21VH68i1GD8tMa\ndzz+DB8TSwqZWFLIj6aN4JPaRhZvrmLxpipe/6CKyup6KqvreYSJjJIvsXHXEMLUUpDjZ+a4wVwy\nvrjDI987i6Pyg8yZOYZZJxfzXy8V8JvtH3B5xis0k0mjZtFIgEb8NJBFowZoIkADARrVrEOSRUj8\nhDSDsEJYfSg+Iggt+FDk4HbELgA5NJIv9fSm3q4PxK3rCUqIfN0PAiv8ZZT3TV+TVZRhxw1nJSMY\nqVvYs2UlhSecnvY0YpGu6pnvCCJyITBNVb9j9y8DJqjq7Di5K4ErAQYMGFD24osvekrnk60bmb7W\njFBtyv4StUVl1PUro67fWMJZfZKGO3DgADk5bXuh6pojrNpWRxN+apsi7LdLbdzSEndbemf5OGVw\nkFOHBBnW15+yyceLPl7DNIQiXDe/mr0NEYKZQl5AyAv4yA34yMvy2W0hL8seC/jICwiFmSEK89ve\neer1GtoqH6ytJLzuaXKr3+XvkZOYG76AelJ7hc0Q8PuEs48N8I0xBWnXqbvIt0SUzXtDrN7VxOqd\nTVTsC3NcnwymD89l0tFBAhmtG80jfQ1RVJXlHzexfFs9LfhoboHmFqUprGbdcvg60dTQPkx/jt8H\nmXbt98lh2/4MIZMIvbIyCWbKZ5asTCE700cvX4g86smlgWBuAXm9k5cnHcmjhxZvoKIxl29PGMAx\nfbz/MVpeXr5SVcvbJKyq3X4BLsL0c0T3LwPuThWmrKxMvdLQ1Ky7Ft6nr/7fs6qRSJvDrVixwlM6\nrclHIhHdd6BZK6o+1RUf7tE//G2pNodbOk0fr2FC4RZd9tY7nRZ/Z8tHIhHdWl2vf124XCuqPtWP\n9tTrzn0Nuru2UWvqm7SuMaQNzWENhVs0EvMcdKdr6Ar5cEuk2+nUWfKRSEQbmsO6eOnbWt8U0lAn\nvm+dKd/SjnsWC7BC21gu95Rmq+1A7IxARwNpH0oZDPgJnn4VfVeu9DYxS5oREfKz/eRn+82vjtWB\ntA0aSweZGT78bfgK7a6ICEMKc6jqnZn2X0k/T3SH/oyuQkQI+jPoFfC1ew6V7oCvC+9Z9ymRUvMO\nMExEjhGRAPA1oHuOAnI4HI4vAD3CxKpqWERmA/Mxv+o+pKrrj7BaDofD8YWlRxgPAFV9GXj5SOvh\ncDgcjp7TbOVwOByOboQzHg6Hw+HwjDMeDofD4fCMMx4Oh8Ph8EyPGGHeHkSkCtjazuD9gOovkHxX\npNHd5LsijZ4u3xVp9HT5rkijK64hSrGqts0ld1tHE36RFjyMsvw8yHdHndw1H3n57qhTd5Pvjjq1\n5xras7hmK4fD4XB4xhkPh8PhcHjGGY/EzPuCyXdFGt1NvivS6OnyXZFGT5fvijS64ho887ntMHc4\nHA5H5+FqHg6Hw+HwjDMeDofD4fCMMx4Oh6PdiGFw65KOzxvOeHQAESkQkfEicmp0SSEbFJHrReRZ\nEfmLiPxARIJdqW+cPiIil4rIz+3+EBEZn0DuMbv+XlfrmA5EZIld14lIbdyyX0QqReTaFOHLEhw7\ntzN1TjciMkZEZttlTCuynp5TNZ2mf/Woz0Uikme3f2rTOimJ7O1tOdYR7DW2eVJxEXlMRK4QkRFt\nlD8+wbEpKeRni0jb5zs2YV4Tkelxxzq149x1mLcTEfkO8D3MrIZrgInAMlWdmkT+KaAOeNwe+jpQ\noKoXJZF/FPiequ6z+wXAHFX99zi561Ppqaq/ThL/vUAEmKqqX7bxv6qq4+LkNgD/ipl8awpw2FRl\nqro3WdpJdNsPrFTVNQnks4B/A4YSM12Aqt6SLI2OIiKFwFJVLU1yfhUwS1XX2f2vA99X1QlpSr8c\nuAkoxlyzYMrk0UnkPeWRNfpXAM/aQ+cD81T17iTynp5TG+Ye4BFVfSeZTJz8WlUdLSKTgVuBO4Gf\nJMpTEVmlqiclCp8i/p8nOp4ij24GZgJ7gSeBZ1T1kxTxTwUmA18BSjDv/+uq+tsk8u8BjwF3AEG7\nLlfVSUnkf4mZ8G4V8BAwX1spqEWkAtgGLFTVX9hjn8m7dOKMRzsRkXXAOGC5qp5ov0J+oaoXJ5F/\nV1XHtHYs5txqVR3bhmM3281Sq090hsVzMQ/0d5LEv0pVT4qNM4mO1wHXYF6SjznceKiqliSK34Z9\nAigHXrSHzsbMCjkCeFpV74iTfwVrXICWmETmxMktUdXJIlIHxD7A0YK3dzKdkug5QFV3JjlXAjwD\nXIIpML4JnKOq++Pk4nVpk04isgm4AViHMeZgAiR0rdPWPIqRXwtMUtV6u98L85GTzDh5ek7t+Q3A\ncIw7oHpaN4CrVXWsiNwKrFPVJ+KfbRG5BrgW89xtiQmeB7ypqpem0OeHMbtB4BxgY/yHV4Jwo4GL\nMcZ5u6qemUI2A/O+nQ5cDTSoasKaiM3z24Eyq/8fgdtVNZJI3oYR4Czgcsw79BTwoKpuSSK/ChgP\nzMVM2X0p8I/ONB49ZjKobkijqjaKCCKSparvi0jCr1fLahGZqKrLAURkAvBmCnmfiBSoao2V70uC\n+xXzlfEqcJKq1tn9/wSeThF/yL4AauWLiCm8YuKfC8wVkXtV9ZoU8SWi0Or0qU3jZkxBfCqm8Lsj\nTv5oVZ3WWqSqOtmu8zzqkyy+hIbDnqsQka9hmma2AWepakMCufbqUqWqXqZUblMexSDEGBm7nWqi\na6/PKZiaqRc+FpHfA2cCt9vaVHwT+hPA3zA1kx/HHK9LVduFhB8bd9K2aat3A7uAPUD/ZEIi8hrQ\nC1gGvAGMU9XdKeINAQ1ANsaYVaYyHPYaVER2WX3CQAHwjIgsUNUbE6mlqmHgWhH5FrDEhuk0nPFo\nP9tFpA+mUFkgIjXAjnghW0NRwA98U0Q+svvFwIYU8c8BlorIM1Z+JvCrFPJDgOaY/WZM00Yy5gLP\nAf1F5FfAhcBPkwm3w3Ak0imEcbzWICJNCeSXisioaBPRkSTmvkXpi5kC+S0RIVWziUduFpEHgNeA\ng3miqs8mkfeaRw9jdH7O7s8AHkwhP4FDzymYe7gxmh+JrjtZLSkFM4FpwJ2quk9EBmBqX7Fx7sfU\nsL7uMe5E5GBqMAmxtZyLgSLMx80Vqprq3VyLqUWMtDruE5FliT4qLO8Az2NqKoXA70XkQlW9MIk+\n1wGzMM4NHwBuUNWQiPiAD4BExuO+6IaqPmLv13dTXEOHcc1WaUBETgPygVdUtTnuXHGqsKlePNvR\nNhXzpfhaqgdaRG7CvJTPYQq984E/q+qtKcKMAM6IiX9jKl29IiI/s3o8bw+di/kCnINpd7/EykUL\n6kxgGFCBKUhTNn90Jh25bx7TeRzTjLeeQzU/TdbEYpuIjgMqaWMeiemMnmxlX1fV1Slku+S6O5M4\nw5+BMQq3qOrvksjfBjyZqB+ulXRyMc1K/wEcpapZSeTKVXVF3LHLVPWxJPK3YJqoPpPXIvLldL+n\n7cUZj88RtpD4it1NWUh0FWL+VooWXEviXyIr0+MLrPYiIutUdZQH+YR59XnOI6/E5VEY+MQ26aQr\n/tmY96wM08/zOvCGqi5MVxo9AWc8HI4jiIjcD9zVSjOJoxshIjdgDMbKdBqlnoYzHg7HEURENgLH\n4qEZyuHoDjjj4XAcQVwzlKOn4oyHw+FwODzj3JM4HA6HwzPOeDgcDofDM854OBytICI3ich6EVkr\nImvsqOvOSmuRGH9XDke3xo0wdzhSICKTML6RTlLVJhHpBwSOsFoOxxHH1TwcjtQMAKpVtQlAVatV\ndYeI/FxE3hGR90RknnVkF6053CUir4vIRhEZJ8bl+AdivKUiIkNF5H0RedTWZp4RkZz4hEXkLBFZ\nJiKrRORpO6IZEblNRDbYsHd2YV44HAdxxsPhSM2rwGAR2Swi/2td0QD8TlXHqepIjMO7c2LCNKvq\nqRh/Q89jfAyNBL4lxgU8GC/I8+x4jlqMB9mD2BrOT4EzrWfUFcD11kHm+cAJNuwvO+GaHY5WccbD\n4UiB9QhcBlwJVAF/tl5LTxeRt6wfpanACTHBoh5c1wHrVXWnrblUYNxlA2xT1ai32scxLlximQgc\nD7wpImswjvKKMYamEXhARC4ADqTtYh0OD7g+D4ejFVS1BVgELLLG4ipgNGZCn23W/X3sbHtR77iR\nmO3ofvSdix9gFb8vwAJV/YxXWTEzPp6BmTBoNsZ4ORxdiqt5OBwpEJFSERkWc+hEYJPdrrb9EAld\na7fCENsZD8bt+JK488uBU0TkOKtHjogMt+nlq+rLwPetPg5Hl+NqHg5HanKBu+3cLWHgn5gmrH2Y\nZqkPMfM1eGUjMMtOivQBcG/sSVWtss1jf7KTJYHpA6kDnhczr7gAP2hH2g5Hh3HuSRyOLkZEhgIv\n2c52h6NH4pqtHA6Hw+EZV/NwOBwOh2dczcPhcDgcnnHGw+FwOByeccbD4XA4HJ5xxsPhcDgcnnHG\nw+FwOBye+X+N13Xb3ljaAQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x109000710>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#名字语料库，包括 8000 个按性别分类的名字。男性和女性的名字存\n",
    "#储在单独的文件中\n",
    "from nltk.corpus import names\n",
    "cfd = nltk.ConditionalFreqDist(\n",
    "    (fileid, name[-1])\n",
    "    for fileid in names.fileids()\n",
    "    for name in names.words(fileid))\n",
    "cfd.plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(u'explorer', [u'IH0', u'K', u'S', u'P', u'L', u'AO1', u'R', u'ER0'])\n(u'explorers', [u'IH0', u'K', u'S', u'P', u'L', u'AO1', u'R', u'ER0', u'Z'])\n(u'explores', [u'IH0', u'K', u'S', u'P', u'L', u'AO1', u'R', u'Z'])\n(u'exploring', [u'IH0', u'K', u'S', u'P', u'L', u'AO1', u'R', u'IH0', u'NG'])\n(u'explosion', [u'IH0', u'K', u'S', u'P', u'L', u'OW1', u'ZH', u'AH0', u'N'])\n(u'explosions', [u'IH0', u'K', u'S', u'P', u'L', u'OW1', u'ZH', u'AH0', u'N', u'Z'])\n(u'explosive', [u'IH0', u'K', u'S', u'P', u'L', u'OW1', u'S', u'IH0', u'V'])\n(u'explosively', [u'EH2', u'K', u'S', u'P', u'L', u'OW1', u'S', u'IH0', u'V', u'L', u'IY0'])\n"
     ]
    }
   ],
   "source": [
    "#发音的字典\n",
    "entries = nltk.corpus.cmudict.entries()\n",
    "for entry in entries[39943:39951]:\n",
    "    print entry"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    ""
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2.0
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}